[llvm] 580210a - [SLP] Convert some tests to opaque pointers (NFC)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 23 01:03:28 PST 2022


Author: Nikita Popov
Date: 2022-12-23T10:02:57+01:00
New Revision: 580210a0c938531ef9fd79f9ffedb93eeb2e66c2

URL: https://github.com/llvm/llvm-project/commit/580210a0c938531ef9fd79f9ffedb93eeb2e66c2
DIFF: https://github.com/llvm/llvm-project/commit/580210a0c938531ef9fd79f9ffedb93eeb2e66c2.diff

LOG: [SLP] Convert some tests to opaque pointers (NFC)

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks-in-loops.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/mul.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
    llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
    llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
    llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
    llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f32.ll
    llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
    llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
    llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
    llvm/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
    llvm/test/Transforms/SLPVectorizer/PowerPC/short-to-double.ll
    llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
    llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
    llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
    llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll
    llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
    llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
    llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
    llvm/test/Transforms/SLPVectorizer/X86/align.ll
    llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
    llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
    llvm/test/Transforms/SLPVectorizer/X86/atomics.ll
    llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll
    llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
    llvm/test/Transforms/SLPVectorizer/X86/bitreverse.ll
    llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
    llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
    llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
    llvm/test/Transforms/SLPVectorizer/X86/bswap.ll
    llvm/test/Transforms/SLPVectorizer/X86/call.ll
    llvm/test/Transforms/SLPVectorizer/X86/cast.ll
    llvm/test/Transforms/SLPVectorizer/X86/catchswitch.ll
    llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
    llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll
    llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
    llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
    llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
    llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
    llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
    llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
    llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
    llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll
    llvm/test/Transforms/SLPVectorizer/X86/ctlz.ll
    llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
    llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
    llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
    llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
    llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
    llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
    llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll
    llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll
    llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll
    llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
    llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll
    llvm/test/Transforms/SLPVectorizer/X86/extract.ll
    llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
    llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll
    llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
    llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll
    llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
    llvm/test/Transforms/SLPVectorizer/X86/fabs.ll
    llvm/test/Transforms/SLPVectorizer/X86/fcopysign.ll
    llvm/test/Transforms/SLPVectorizer/X86/flag.ll
    llvm/test/Transforms/SLPVectorizer/X86/float-min-max.ll
    llvm/test/Transforms/SLPVectorizer/X86/fma.ll
    llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
    llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
    llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
    llvm/test/Transforms/SLPVectorizer/X86/fptosi-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/fptosi.ll
    llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
    llvm/test/Transforms/SLPVectorizer/X86/fround.ll
    llvm/test/Transforms/SLPVectorizer/X86/funclet.ll
    llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
    llvm/test/Transforms/SLPVectorizer/X86/gep.ll
    llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
    llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
    llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
    llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
    llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
    llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-crash-index.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-element-multiple-uses.ll
    llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
    llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll
    llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
    llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll
    llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
    llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
    llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
    llvm/test/Transforms/SLPVectorizer/X86/limit.ll
    llvm/test/Transforms/SLPVectorizer/X86/load-bitcast-vec.ll
    llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
    llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
    llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
    llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
    llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
    llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
    llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
    llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
    llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
    llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
    llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
    llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
    llvm/test/Transforms/SLPVectorizer/X86/opt.ll
    llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
    llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
    llvm/test/Transforms/SLPVectorizer/X86/partail.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
    llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
    llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr54465.ll
    llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
    llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
    llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll
    llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll
    llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
    llvm/test/Transforms/SLPVectorizer/X86/resched.ll
    llvm/test/Transforms/SLPVectorizer/X86/return.ll
    llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
    llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
    llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
    llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
    llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
    llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
    llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
    llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
    llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll
    llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/sext.ll
    llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
    llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
    llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
    llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
    llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
    llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-diff-size.ll
    llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
    llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
    llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
    llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
    llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
    llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll
    llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll
    llvm/test/Transforms/SLPVectorizer/X86/sqrt.ll
    llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
    llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
    llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll
    llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
    llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
    llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
    llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
    llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
    llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
    llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
    llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
    llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
    llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
    llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
    llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll
    llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll
    llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
    llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll
    llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
    llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/zext.ll
    llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
    llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll
    llvm/test/Transforms/SLPVectorizer/reschedule.ll
    llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
    llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
    llvm/test/Transforms/SLPVectorizer/vectorizable-functions-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
index 5b3aa13b58903..bce087caeff5a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
@@ -5,39 +5,31 @@
 ; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=kryo < %s | FileCheck --check-prefix=NO_SLP %s
 ; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic -slp-min-reg-size=128 < %s | FileCheck --check-prefix=NO_SLP %s
 
-define void @f(float* %r, float* %w) {
+define void @f(ptr %r, ptr %w) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:    [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0
-; CHECK-NEXT:    [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[R0]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[R:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[W0]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[W:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; NO_SLP-LABEL: @f(
-; NO_SLP-NEXT:    [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0
-; NO_SLP-NEXT:    [[R1:%.*]] = getelementptr inbounds float, float* [[R]], i64 1
-; NO_SLP-NEXT:    [[F0:%.*]] = load float, float* [[R0]], align 4
-; NO_SLP-NEXT:    [[F1:%.*]] = load float, float* [[R1]], align 4
+; NO_SLP-NEXT:    [[R1:%.*]] = getelementptr inbounds float, ptr [[R:%.*]], i64 1
+; NO_SLP-NEXT:    [[F0:%.*]] = load float, ptr [[R]], align 4
+; NO_SLP-NEXT:    [[F1:%.*]] = load float, ptr [[R1]], align 4
 ; NO_SLP-NEXT:    [[ADD0:%.*]] = fadd float [[F0]], [[F0]]
 ; NO_SLP-NEXT:    [[ADD1:%.*]] = fadd float [[F1]], [[F1]]
-; NO_SLP-NEXT:    [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0
-; NO_SLP-NEXT:    [[W1:%.*]] = getelementptr inbounds float, float* [[W]], i64 1
-; NO_SLP-NEXT:    store float [[ADD0]], float* [[W0]], align 4
-; NO_SLP-NEXT:    store float [[ADD1]], float* [[W1]], align 4
+; NO_SLP-NEXT:    [[W1:%.*]] = getelementptr inbounds float, ptr [[W:%.*]], i64 1
+; NO_SLP-NEXT:    store float [[ADD0]], ptr [[W]], align 4
+; NO_SLP-NEXT:    store float [[ADD1]], ptr [[W1]], align 4
 ; NO_SLP-NEXT:    ret void
 ;
-  %r0 = getelementptr inbounds float, float* %r, i64 0
-  %r1 = getelementptr inbounds float, float* %r, i64 1
-  %f0 = load float, float* %r0
-  %f1 = load float, float* %r1
+  %r1 = getelementptr inbounds float, ptr %r, i64 1
+  %f0 = load float, ptr %r
+  %f1 = load float, ptr %r1
   %add0 = fadd float %f0, %f0
   %add1 = fadd float %f1, %f1
-  %w0 = getelementptr inbounds float, float* %w, i64 0
-  %w1 = getelementptr inbounds float, float* %w, i64 1
-  store float %add0, float* %w0
-  store float %add1, float* %w1
+  %w1 = getelementptr inbounds float, ptr %w, i64 1
+  store float %add0, ptr %w
+  store float %add1, ptr %w1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
index 74782cc84a15a..6bea2554c6f96 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -1,44 +1,39 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s
 
-define void @f1(<2 x i16> %x, i16* %a) {
+define void @f1(<2 x i16> %x, ptr %a) {
 ; CHECK-LABEL: @f1(
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i16> [[X]], i32 0
-; CHECK-NEXT:    store i16 [[TMP1]], i16* [[A:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
-; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    store i16 [[TMP1]], ptr [[A:%.*]], align 2
+; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], ptr undef, align 2
 ; CHECK-NEXT:    ret void
 ;
   %t2 = extractelement <2 x i16> %x, i32 0
   %t3 = extractelement <2 x i16> %x, i32 1
-  %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
-  %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
-  %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
-  %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
-  store i16 %t2, i16* %a
-  store i16 %t2, i16* %ptr0
-  store i16 %t3, i16* %ptr1
-  store i16 %t3, i16* %ptr2
-  store i16 %t2, i16* %ptr3
+  %ptr1 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 1
+  %ptr2 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 2
+  %ptr3 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 3
+  store i16 %t2, ptr %a
+  store i16 %t2, ptr undef
+  store i16 %t3, ptr %ptr1
+  store i16 %t3, ptr %ptr2
+  store i16 %t2, ptr %ptr3
   ret void
 }
 
-define void @f2(<2 x i16> %x, i16* %a) {
+define void @f2(<2 x i16> %x, ptr %a) {
 ; CHECK-LABEL: @f2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[CONT:%.*]]
 ; CHECK:       cont:
 ; CHECK-NEXT:    [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
-; CHECK-NEXT:    [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT:    [[AA:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i16> [[XX]], i32 0
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[A]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
-; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
+; CHECK-NEXT:    store i16 [[TMP0]], ptr [[A]], align 2
+; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], ptr undef, align 2
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i16, ptr [[A]], align 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
 ; CHECK:       exit:
@@ -49,19 +44,18 @@ entry:
 
 cont:                                           ; preds = %entry, %cont
   %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
-  %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
+  %aa = phi ptr [ %a, %entry ], [ undef, %cont ]
   %t2 = extractelement <2 x i16> %xx, i32 0
   %t3 = extractelement <2 x i16> %xx, i32 1
-  %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
-  %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
-  %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
-  %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
-  store i16 %t2, i16* %a
-  store i16 %t2, i16* %ptr0
-  store i16 %t3, i16* %ptr1
-  store i16 %t3, i16* %ptr2
-  store i16 %t2, i16* %ptr3
-  %a_val = load i16, i16* %a, align 2
+  %ptr1 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 1
+  %ptr2 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 2
+  %ptr3 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 3
+  store i16 %t2, ptr %a
+  store i16 %t2, ptr undef
+  store i16 %t3, ptr %ptr1
+  store i16 %t3, ptr %ptr2
+  store i16 %t2, ptr %ptr3
+  %a_val = load i16, ptr %a, align 2
   %cmp = icmp eq i16 %a_val, 0
   br i1 %cmp, label %cont, label %exit
 
@@ -69,20 +63,18 @@ exit:                                           ; preds = %cont
   ret void
 }
 
-define void @f3(<2 x i16> %x, i16* %a) {
+define void @f3(<2 x i16> %x, ptr %a) {
 ; CHECK-LABEL: @f3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[CONT:%.*]]
 ; CHECK:       cont:
 ; CHECK-NEXT:    [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
-; CHECK-NEXT:    [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT:    [[AA:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 1>
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i16> [[XX]], i32 1
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[A]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
-; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
+; CHECK-NEXT:    store i16 [[TMP0]], ptr [[A]], align 2
+; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], ptr undef, align 2
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i16, ptr [[A]], align 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
 ; CHECK:       exit:
@@ -93,19 +85,18 @@ entry:
 
 cont:                                           ; preds = %entry, %cont
   %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
-  %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
+  %aa = phi ptr [ %a, %entry ], [ undef, %cont ]
   %t2 = extractelement <2 x i16> %xx, i32 0
   %t3 = extractelement <2 x i16> %xx, i32 1
-  %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
-  %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
-  %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
-  %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
-  store i16 %t3, i16* %a
-  store i16 %t3, i16* %ptr0
-  store i16 %t2, i16* %ptr1
-  store i16 %t2, i16* %ptr2
-  store i16 %t3, i16* %ptr3
-  %a_val = load i16, i16* %a, align 2
+  %ptr1 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 1
+  %ptr2 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 2
+  %ptr3 = getelementptr inbounds [4 x i16], ptr undef, i16 0, i16 3
+  store i16 %t3, ptr %a
+  store i16 %t3, ptr undef
+  store i16 %t2, ptr %ptr1
+  store i16 %t2, ptr %ptr2
+  store i16 %t3, ptr %ptr3
+  %a_val = load i16, ptr %a, align 2
   %cmp = icmp eq i16 %a_val, 0
   br i1 %cmp, label %cont, label %exit
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
index 1cb1413cf2985..5e8616673bbcd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
@@ -8,16 +8,16 @@ target triple = "arm64-apple-ios14.0.0"
 declare float @llvm.sin.f32(float)
 
 ; Accelerate provides sin() for <4 x float>
-define <4 x float> @int_sin_4x(<4 x float>* %a) {
+define <4 x float> @int_sin_4x(ptr %a) {
 ; CHECK-LABEL: @int_sin_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @int_sin_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -34,7 +34,7 @@ define <4 x float> @int_sin_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @llvm.sin.f32(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -52,21 +52,21 @@ entry:
 
 declare float @ceilf(float) readonly nounwind willreturn
 
-define <4 x float> @ceil_4x(<4 x float>* %a) {
+define <4 x float> @ceil_4x(ptr %a) {
 ; CHECK-LABEL: @ceil_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @ceil_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @ceilf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -84,21 +84,21 @@ entry:
 
 declare float @fabsf(float) readonly nounwind willreturn
 
-define <4 x float> @fabs_4x(<4 x float>* %a) {
+define <4 x float> @fabs_4x(ptr %a) {
 ; CHECK-LABEL: @fabs_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @fabs_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @fabsf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -114,21 +114,21 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @llvm.fabs.f32(float) nounwind willreturn
-define <4 x float> @int_fabs_4x(<4 x float>* %a) {
+define <4 x float> @int_fabs_4x(ptr %a) {
 ; CHECK-LABEL: @int_fabs_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @int_fabs_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @llvm.fabs.f32(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -144,21 +144,21 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @floorf(float) readonly nounwind willreturn
-define <4 x float> @floor_4x(<4 x float>* %a) {
+define <4 x float> @floor_4x(ptr %a) {
 ; CHECK-LABEL: @floor_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @floor_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @floorf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -174,21 +174,21 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @sqrtf(float) readonly nounwind willreturn
-define <4 x float> @sqrt_4x(<4 x float>* %a) {
+define <4 x float> @sqrt_4x(ptr %a) {
 ; CHECK-LABEL: @sqrt_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @sqrt_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @sqrtf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -204,16 +204,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @expf(float) readonly nounwind willreturn
-define <4 x float> @exp_4x(<4 x float>* %a) {
+define <4 x float> @exp_4x(ptr %a) {
 ; CHECK-LABEL: @exp_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @exp_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -230,7 +230,7 @@ define <4 x float> @exp_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @expf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -246,16 +246,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @expm1f(float) readonly nounwind willreturn
-define <4 x float> @expm1_4x(<4 x float>* %a) {
+define <4 x float> @expm1_4x(ptr %a) {
 ; CHECK-LABEL: @expm1_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @expm1_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -271,7 +271,7 @@ define <4 x float> @expm1_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @expm1f(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -287,16 +287,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @logf(float) readonly nounwind willreturn
-define <4 x float> @log_4x(<4 x float>* %a) {
+define <4 x float> @log_4x(ptr %a) {
 ; CHECK-LABEL: @log_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @log_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -313,7 +313,7 @@ define <4 x float> @log_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @logf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -329,16 +329,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @log1pf(float) readonly nounwind willreturn
-define <4 x float> @log1p_4x(<4 x float>* %a) {
+define <4 x float> @log1p_4x(ptr %a) {
 ; CHECK-LABEL: @log1p_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @log1p_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -354,7 +354,7 @@ define <4 x float> @log1p_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @log1pf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -370,10 +370,10 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @log10pf(float) readonly nounwind willreturn
-define <4 x float> @log10p_4x(<4 x float>* %a) {
+define <4 x float> @log10p_4x(ptr %a) {
 ; CHECK-LABEL: @log10p_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -390,7 +390,7 @@ define <4 x float> @log10p_4x(<4 x float>* %a) {
 ;
 ; NOACCELERATE-LABEL: @log10p_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -406,7 +406,7 @@ define <4 x float> @log10p_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @log10pf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -422,16 +422,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @logbf(float) readonly nounwind willreturn
-define <4 x float> @logb_4x(<4 x float>* %a) {
+define <4 x float> @logb_4x(ptr %a) {
 ; CHECK-LABEL: @logb_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @logb_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -447,7 +447,7 @@ define <4 x float> @logb_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @logbf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -463,16 +463,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @sinf(float) readonly nounwind willreturn
-define <4 x float> @sin_4x(<4 x float>* %a) {
+define <4 x float> @sin_4x(ptr %a) {
 ; CHECK-LABEL: @sin_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @sin_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -489,7 +489,7 @@ define <4 x float> @sin_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @sinf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -505,16 +505,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @cosf(float) readonly nounwind willreturn
-define <4 x float> @cos_4x(<4 x float>* %a) {
+define <4 x float> @cos_4x(ptr %a) {
 ; CHECK-LABEL: @cos_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @cos_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -531,7 +531,7 @@ define <4 x float> @cos_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @cosf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -547,16 +547,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @tanf(float) readonly nounwind willreturn
-define <4 x float> @tan_4x(<4 x float>* %a) {
+define <4 x float> @tan_4x(ptr %a) {
 ; CHECK-LABEL: @tan_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @tan_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -572,7 +572,7 @@ define <4 x float> @tan_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @tanf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -588,16 +588,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @asinf(float) readonly nounwind willreturn
-define <4 x float> @asin_4x(<4 x float>* %a) {
+define <4 x float> @asin_4x(ptr %a) {
 ; CHECK-LABEL: @asin_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @asin_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -613,7 +613,7 @@ define <4 x float> @asin_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @asinf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -629,16 +629,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @acosf(float) readonly nounwind willreturn
-define <4 x float> @acos_4x(<4 x float>* %a) {
+define <4 x float> @acos_4x(ptr %a) {
 ; CHECK-LABEL: @acos_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @acos_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -654,7 +654,7 @@ define <4 x float> @acos_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @acosf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -670,16 +670,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @atanf(float) readonly nounwind willreturn
-define <4 x float> @atan_4x(<4 x float>* %a) {
+define <4 x float> @atan_4x(ptr %a) {
 ; CHECK-LABEL: @atan_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @atan_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -695,7 +695,7 @@ define <4 x float> @atan_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @atanf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -711,16 +711,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @sinhf(float) readonly nounwind willreturn
-define <4 x float> @sinh_4x(<4 x float>* %a) {
+define <4 x float> @sinh_4x(ptr %a) {
 ; CHECK-LABEL: @sinh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @sinh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -736,7 +736,7 @@ define <4 x float> @sinh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @sinhf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -752,16 +752,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @coshf(float) readonly nounwind willreturn
-define <4 x float> @cosh_4x(<4 x float>* %a) {
+define <4 x float> @cosh_4x(ptr %a) {
 ; CHECK-LABEL: @cosh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @cosh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -777,7 +777,7 @@ define <4 x float> @cosh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @coshf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -793,16 +793,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @tanhf(float) readonly nounwind willreturn
-define <4 x float> @tanh_4x(<4 x float>* %a) {
+define <4 x float> @tanh_4x(ptr %a) {
 ; CHECK-LABEL: @tanh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @tanh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -818,7 +818,7 @@ define <4 x float> @tanh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @tanhf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -834,16 +834,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @asinhf(float) readonly nounwind willreturn
-define <4 x float> @asinh_4x(<4 x float>* %a) {
+define <4 x float> @asinh_4x(ptr %a) {
 ; CHECK-LABEL: @asinh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @asinh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -859,7 +859,7 @@ define <4 x float> @asinh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @asinhf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -875,16 +875,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @acoshf(float) readonly nounwind willreturn
-define <4 x float> @acosh_4x(<4 x float>* %a) {
+define <4 x float> @acosh_4x(ptr %a) {
 ; CHECK-LABEL: @acosh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @acosh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -900,7 +900,7 @@ define <4 x float> @acosh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @acoshf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -916,16 +916,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @atanhf(float) readonly nounwind willreturn
-define <4 x float> @atanh_4x(<4 x float>* %a) {
+define <4 x float> @atanh_4x(ptr %a) {
 ; CHECK-LABEL: @atanh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @atanh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -941,7 +941,7 @@ define <4 x float> @atanh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @atanhf(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -958,10 +958,10 @@ entry:
 }
 
 ; Accelerate *does not* provide sin() for <2 x float>.
-define <2 x float> @sin_2x(<2 x float>* %a) {
+define <2 x float> @sin_2x(ptr %a) {
 ; CHECK-LABEL: @sin_2x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
@@ -972,7 +972,7 @@ define <2 x float> @sin_2x(<2 x float>* %a) {
 ;
 ; NOACCELERATE-LABEL: @sin_2x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
@@ -982,7 +982,7 @@ define <2 x float> @sin_2x(<2 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
 entry:
-  %0 = load <2 x float>, <2 x float>* %a, align 16
+  %0 = load <2 x float>, ptr %a, align 16
   %vecext = extractelement <2 x float> %0, i32 0
   %1 = tail call fast float @llvm.sin.f32(float %vecext)
   %vecins = insertelement <2 x float> poison, float %1, i32 0
@@ -996,16 +996,16 @@ entry:
 declare float @llvm.cos.f32(float)
 
 ; Accelerate provides cos() for <4 x float>
-define <4 x float> @int_cos_4x(<4 x float>* %a) {
+define <4 x float> @int_cos_4x(ptr %a) {
 ; CHECK-LABEL: @int_cos_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @int_cos_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -1022,7 +1022,7 @@ define <4 x float> @int_cos_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @llvm.cos.f32(float %vecext)
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -1039,10 +1039,10 @@ entry:
 }
 
 ; Accelerate *does not* provide cos() for <2 x float>.
-define <2 x float> @cos_2x(<2 x float>* %a) {
+define <2 x float> @cos_2x(ptr %a) {
 ; CHECK-LABEL: @cos_2x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
@@ -1053,7 +1053,7 @@ define <2 x float> @cos_2x(<2 x float>* %a) {
 ;
 ; NOACCELERATE-LABEL: @cos_2x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
@@ -1063,7 +1063,7 @@ define <2 x float> @cos_2x(<2 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
 entry:
-  %0 = load <2 x float>, <2 x float>* %a, align 16
+  %0 = load <2 x float>, ptr %a, align 16
   %vecext = extractelement <2 x float> %0, i32 0
   %1 = tail call fast float @llvm.cos.f32(float %vecext)
   %vecins = insertelement <2 x float> poison, float %1, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index fee85475d0c2b..4df52503e1e2c 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -8,16 +8,16 @@ target triple = "arm64-apple-ios14.0.0"
 declare float @llvm.sin.f32(float)
 
 ; Accelerate provides sin() for <4 x float>
-define <4 x float> @int_sin_4x(<4 x float>* %a) {
+define <4 x float> @int_sin_4x(ptr %a) {
 ; CHECK-LABEL: @int_sin_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @int_sin_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -34,7 +34,7 @@ define <4 x float> @int_sin_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @llvm.sin.f32(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -52,21 +52,21 @@ entry:
 
 declare float @ceilf(float) readonly nounwind willreturn
 
-define <4 x float> @ceil_4x(<4 x float>* %a) {
+define <4 x float> @ceil_4x(ptr %a) {
 ; CHECK-LABEL: @ceil_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @ceil_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @ceilf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -84,21 +84,21 @@ entry:
 
 declare float @fabsf(float) readonly nounwind willreturn
 
-define <4 x float> @fabs_4x(<4 x float>* %a) {
+define <4 x float> @fabs_4x(ptr %a) {
 ; CHECK-LABEL: @fabs_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @fabs_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @fabsf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -114,21 +114,21 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @llvm.fabs.f32(float)
-define <4 x float> @int_fabs_4x(<4 x float>* %a) {
+define <4 x float> @int_fabs_4x(ptr %a) {
 ; CHECK-LABEL: @int_fabs_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @int_fabs_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @llvm.fabs.f32(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -144,21 +144,21 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @floorf(float) readonly nounwind willreturn
-define <4 x float> @floor_4x(<4 x float>* %a) {
+define <4 x float> @floor_4x(ptr %a) {
 ; CHECK-LABEL: @floor_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @floor_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @floorf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -174,21 +174,21 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @sqrtf(float) readonly nounwind willreturn
-define <4 x float> @sqrt_4x(<4 x float>* %a) {
+define <4 x float> @sqrt_4x(ptr %a) {
 ; CHECK-LABEL: @sqrt_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @sqrt_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
 ; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @sqrtf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -204,16 +204,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @expf(float) readonly nounwind willreturn
-define <4 x float> @exp_4x(<4 x float>* %a) {
+define <4 x float> @exp_4x(ptr %a) {
 ; CHECK-LABEL: @exp_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @exp_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -230,7 +230,7 @@ define <4 x float> @exp_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @expf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -246,16 +246,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @expm1f(float) readonly nounwind willreturn
-define <4 x float> @expm1_4x(<4 x float>* %a) {
+define <4 x float> @expm1_4x(ptr %a) {
 ; CHECK-LABEL: @expm1_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @expm1_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -271,7 +271,7 @@ define <4 x float> @expm1_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @expm1f(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -287,16 +287,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @logf(float) readonly nounwind willreturn
-define <4 x float> @log_4x(<4 x float>* %a) {
+define <4 x float> @log_4x(ptr %a) {
 ; CHECK-LABEL: @log_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @log_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -313,7 +313,7 @@ define <4 x float> @log_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @logf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -329,16 +329,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @log1pf(float) readonly nounwind willreturn
-define <4 x float> @log1p_4x(<4 x float>* %a) {
+define <4 x float> @log1p_4x(ptr %a) {
 ; CHECK-LABEL: @log1p_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @log1p_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -354,7 +354,7 @@ define <4 x float> @log1p_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @log1pf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -370,10 +370,10 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @log10pf(float) readonly nounwind willreturn
-define <4 x float> @log10p_4x(<4 x float>* %a) {
+define <4 x float> @log10p_4x(ptr %a) {
 ; CHECK-LABEL: @log10p_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -390,7 +390,7 @@ define <4 x float> @log10p_4x(<4 x float>* %a) {
 ;
 ; NOACCELERATE-LABEL: @log10p_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -406,7 +406,7 @@ define <4 x float> @log10p_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @log10pf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -422,16 +422,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @logbf(float) readonly nounwind willreturn
-define <4 x float> @logb_4x(<4 x float>* %a) {
+define <4 x float> @logb_4x(ptr %a) {
 ; CHECK-LABEL: @logb_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @logb_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -447,7 +447,7 @@ define <4 x float> @logb_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @logbf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -463,16 +463,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @sinf(float) readonly nounwind willreturn
-define <4 x float> @sin_4x(<4 x float>* %a) {
+define <4 x float> @sin_4x(ptr %a) {
 ; CHECK-LABEL: @sin_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @sin_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -489,7 +489,7 @@ define <4 x float> @sin_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @sinf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -505,16 +505,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @cosf(float) readonly nounwind willreturn
-define <4 x float> @cos_4x(<4 x float>* %a) {
+define <4 x float> @cos_4x(ptr %a) {
 ; CHECK-LABEL: @cos_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @cos_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -531,7 +531,7 @@ define <4 x float> @cos_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @cosf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -547,16 +547,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @tanf(float) readonly nounwind willreturn
-define <4 x float> @tan_4x(<4 x float>* %a) {
+define <4 x float> @tan_4x(ptr %a) {
 ; CHECK-LABEL: @tan_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @tan_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -572,7 +572,7 @@ define <4 x float> @tan_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @tanf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -588,16 +588,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @asinf(float) readonly nounwind willreturn
-define <4 x float> @asin_4x(<4 x float>* %a) {
+define <4 x float> @asin_4x(ptr %a) {
 ; CHECK-LABEL: @asin_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @asin_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -613,7 +613,7 @@ define <4 x float> @asin_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @asinf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -629,16 +629,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @acosf(float) readonly nounwind willreturn
-define <4 x float> @acos_4x(<4 x float>* %a) {
+define <4 x float> @acos_4x(ptr %a) {
 ; CHECK-LABEL: @acos_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @acos_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -654,7 +654,7 @@ define <4 x float> @acos_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @acosf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -670,16 +670,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @atanf(float) readonly nounwind willreturn
-define <4 x float> @atan_4x(<4 x float>* %a) {
+define <4 x float> @atan_4x(ptr %a) {
 ; CHECK-LABEL: @atan_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @atan_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -695,7 +695,7 @@ define <4 x float> @atan_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @atanf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -711,16 +711,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @sinhf(float) readonly nounwind willreturn
-define <4 x float> @sinh_4x(<4 x float>* %a) {
+define <4 x float> @sinh_4x(ptr %a) {
 ; CHECK-LABEL: @sinh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @sinh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -736,7 +736,7 @@ define <4 x float> @sinh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @sinhf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -752,16 +752,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @coshf(float) readonly nounwind willreturn
-define <4 x float> @cosh_4x(<4 x float>* %a) {
+define <4 x float> @cosh_4x(ptr %a) {
 ; CHECK-LABEL: @cosh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @cosh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -777,7 +777,7 @@ define <4 x float> @cosh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @coshf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -793,16 +793,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @tanhf(float) readonly nounwind willreturn
-define <4 x float> @tanh_4x(<4 x float>* %a) {
+define <4 x float> @tanh_4x(ptr %a) {
 ; CHECK-LABEL: @tanh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @tanh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -818,7 +818,7 @@ define <4 x float> @tanh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @tanhf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -834,16 +834,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @asinhf(float) readonly nounwind willreturn
-define <4 x float> @asinh_4x(<4 x float>* %a) {
+define <4 x float> @asinh_4x(ptr %a) {
 ; CHECK-LABEL: @asinh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @asinh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -859,7 +859,7 @@ define <4 x float> @asinh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @asinhf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -875,16 +875,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @acoshf(float) readonly nounwind willreturn
-define <4 x float> @acosh_4x(<4 x float>* %a) {
+define <4 x float> @acosh_4x(ptr %a) {
 ; CHECK-LABEL: @acosh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @acosh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -900,7 +900,7 @@ define <4 x float> @acosh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @acoshf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -916,16 +916,16 @@ entry:
   ret <4 x float> %vecins.3
 }
 declare float @atanhf(float) readonly nounwind willreturn
-define <4 x float> @atanh_4x(<4 x float>* %a) {
+define <4 x float> @atanh_4x(ptr %a) {
 ; CHECK-LABEL: @atanh_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @atanh_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -941,7 +941,7 @@ define <4 x float> @atanh_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @atanhf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -958,10 +958,10 @@ entry:
 }
 
 ; Accelerate *does not* provide sin() for <2 x float>.
-define <2 x float> @sin_2x(<2 x float>* %a) {
+define <2 x float> @sin_2x(ptr %a) {
 ; CHECK-LABEL: @sin_2x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
@@ -972,7 +972,7 @@ define <2 x float> @sin_2x(<2 x float>* %a) {
 ;
 ; NOACCELERATE-LABEL: @sin_2x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
@@ -982,7 +982,7 @@ define <2 x float> @sin_2x(<2 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
 entry:
-  %0 = load <2 x float>, <2 x float>* %a, align 16
+  %0 = load <2 x float>, ptr %a, align 16
   %vecext = extractelement <2 x float> %0, i32 0
   %1 = tail call fast float @llvm.sin.f32(float %vecext)
   %vecins = insertelement <2 x float> undef, float %1, i32 0
@@ -996,16 +996,16 @@ entry:
 declare float @llvm.cos.f32(float)
 
 ; Accelerate provides cos() for <4 x float>
-define <4 x float> @int_cos_4x(<4 x float>* %a) {
+define <4 x float> @int_cos_4x(ptr %a) {
 ; CHECK-LABEL: @int_cos_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 ; NOACCELERATE-LABEL: @int_cos_4x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -1022,7 +1022,7 @@ define <4 x float> @int_cos_4x(<4 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @llvm.cos.f32(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -1039,10 +1039,10 @@ entry:
 }
 
 ; Accelerate *does not* provide cos() for <2 x float>.
-define <2 x float> @cos_2x(<2 x float>* %a) {
+define <2 x float> @cos_2x(ptr %a) {
 ; CHECK-LABEL: @cos_2x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
@@ -1053,7 +1053,7 @@ define <2 x float> @cos_2x(<2 x float>* %a) {
 ;
 ; NOACCELERATE-LABEL: @cos_2x(
 ; NOACCELERATE-NEXT:  entry:
-; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
@@ -1063,7 +1063,7 @@ define <2 x float> @cos_2x(<2 x float>* %a) {
 ; NOACCELERATE-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
 entry:
-  %0 = load <2 x float>, <2 x float>* %a, align 16
+  %0 = load <2 x float>, ptr %a, align 16
   %vecext = extractelement <2 x float> %0, i32 0
   %1 = tail call fast float @llvm.cos.f32(float %vecext)
   %vecins = insertelement <2 x float> undef, float %1, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
index d5ac0eaf80cd1..19b6d82818532 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -5,7 +5,7 @@ target triple = "aarch64--linux-gnu"
 
 %structA = type { [2 x float] }
 
-define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
+define void @test1(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
@@ -13,9 +13,7 @@ define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
 ; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH:%.*]]
 ; CHECK:       for.body3.lr.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
@@ -33,11 +31,10 @@ entry:
 for.body3.lr.ph:
   %conv5 = sitofp i32 %ymin to float
   %conv = sitofp i32 %xmin to float
-  %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-  %0 = load float, float* %arrayidx4, align 4
+  %0 = load float, ptr %J, align 4
   %sub = fsub fast float %conv, %0
-  %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-  %1 = load float, float* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1
+  %1 = load float, ptr %arrayidx9, align 4
   %sub10 = fsub fast float %conv5, %1
   %mul11 = fmul fast float %sub, %sub
   %mul12 = fmul fast float %sub10, %sub10
@@ -49,7 +46,7 @@ for.end27:
   ret void
 }
 
-define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
+define void @test2(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
@@ -57,9 +54,7 @@ define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
 ; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH:%.*]]
 ; CHECK:       for.body3.lr.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCTA:%.*]], %structA* [[J:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
@@ -77,11 +72,10 @@ entry:
 for.body3.lr.ph:
   %conv5 = sitofp i32 %ymin to float
   %conv = sitofp i32 %xmin to float
-  %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-  %0 = load float, float* %arrayidx4, align 4
+  %0 = load float, ptr %J, align 4
   %sub = fsub fast float %conv, %0
-  %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-  %1 = load float, float* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1
+  %1 = load float, ptr %arrayidx9, align 4
   %sub10 = fsub fast float %conv5, %1
   %mul11 = fmul fast float %sub, %sub
   %mul12 = fmul fast float %sub10, %sub10

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
index b6c35d322a974..e3ad918498ee7 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
 
 declare void @foo(i64, i64, i64, i64)
 
-define void @test1(<4 x i16> %a, <4 x i16> %b, i64* %p) {
+define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
 ; Make sure types of sub and its sources are not extended.
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
@@ -14,17 +14,17 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, i64* %p) {
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, i64* [[GEP0]], align 4
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP2]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP3]]
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP4]]
-; CHECK-NEXT:    [[LOAD3:%.*]] = load i64, i64* [[GEP3]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
 ; CHECK-NEXT:    call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
 ; CHECK-NEXT:    ret void
 ;
@@ -34,25 +34,25 @@ entry:
   %sub0 = sub <4 x i32> %z0, %z1
   %e0 = extractelement <4 x i32> %sub0, i32 0
   %s0 = sext i32 %e0 to i64
-  %gep0 = getelementptr inbounds i64, i64* %p, i64 %s0
-  %load0 = load i64, i64* %gep0
+  %gep0 = getelementptr inbounds i64, ptr %p, i64 %s0
+  %load0 = load i64, ptr %gep0
   %e1 = extractelement <4 x i32> %sub0, i32 1
   %s1 = sext i32 %e1 to i64
-  %gep1 = getelementptr inbounds i64, i64* %p, i64 %s1
-  %load1 = load i64, i64* %gep1
+  %gep1 = getelementptr inbounds i64, ptr %p, i64 %s1
+  %load1 = load i64, ptr %gep1
   %e2 = extractelement <4 x i32> %sub0, i32 2
   %s2 = sext i32 %e2 to i64
-  %gep2 = getelementptr inbounds i64, i64* %p, i64 %s2
-  %load2 = load i64, i64* %gep2
+  %gep2 = getelementptr inbounds i64, ptr %p, i64 %s2
+  %load2 = load i64, ptr %gep2
   %e3 = extractelement <4 x i32> %sub0, i32 3
   %s3 = sext i32 %e3 to i64
-  %gep3 = getelementptr inbounds i64, i64* %p, i64 %s3
-  %load3 = load i64, i64* %gep3
+  %gep3 = getelementptr inbounds i64, ptr %p, i64 %s3
+  %load3 = load i64, ptr %gep3
   call void @foo(i64 %load0, i64 %load1, i64 %load2, i64 %load3)
   ret void
 }
 
-define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c3, i64* %p) {
+define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c3, ptr %p) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
@@ -65,17 +65,17 @@ define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, i64* [[GEP0]], align 4
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP7]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP7]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP8]]
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP8]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP9]]
-; CHECK-NEXT:    [[LOAD3:%.*]] = load i64, i64* [[GEP3]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP9]]
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
 ; CHECK-NEXT:    call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
 ; CHECK-NEXT:    ret void
 ;
@@ -86,23 +86,23 @@ entry:
   %e0 = extractelement <4 x i32> %sub0, i32 0
   %s0 = sext i32 %e0 to i64
   %a0 = add i64 %s0, %c0
-  %gep0 = getelementptr inbounds i64, i64* %p, i64 %a0
-  %load0 = load i64, i64* %gep0
+  %gep0 = getelementptr inbounds i64, ptr %p, i64 %a0
+  %load0 = load i64, ptr %gep0
   %e1 = extractelement <4 x i32> %sub0, i32 1
   %s1 = sext i32 %e1 to i64
   %a1 = add i64 %s1, %c1
-  %gep1 = getelementptr inbounds i64, i64* %p, i64 %a1
-  %load1 = load i64, i64* %gep1
+  %gep1 = getelementptr inbounds i64, ptr %p, i64 %a1
+  %load1 = load i64, ptr %gep1
   %e2 = extractelement <4 x i32> %sub0, i32 2
   %s2 = sext i32 %e2 to i64
   %a2 = add i64 %s2, %c2
-  %gep2 = getelementptr inbounds i64, i64* %p, i64 %a2
-  %load2 = load i64, i64* %gep2
+  %gep2 = getelementptr inbounds i64, ptr %p, i64 %a2
+  %load2 = load i64, ptr %gep2
   %e3 = extractelement <4 x i32> %sub0, i32 3
   %s3 = sext i32 %e3 to i64
   %a3 = add i64 %s3, %c3
-  %gep3 = getelementptr inbounds i64, i64* %p, i64 %a3
-  %load3 = load i64, i64* %gep3
+  %gep3 = getelementptr inbounds i64, ptr %p, i64 %a3
+  %load3 = load i64, ptr %gep3
   call void @foo(i64 %load0, i64 %load1, i64 %load2, i64 %load3)
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index c53b263d7e2e0..9efad068b8ec0 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -55,53 +55,53 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
 }
 
 @data = global [6 x [258 x i8]] zeroinitializer, align 1
-define void @gather_load(i16* noalias %ptr) {
+define void @gather_load(ptr noalias %ptr) {
 ; CHECK-LABEL: @gather_load(
-; CHECK-NEXT:    [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 1
-; CHECK-NEXT:    [[ARRAYIDX183:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2
-; CHECK-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 3
-; CHECK-NEXT:    [[ARRAYIDX185:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 4
-; CHECK-NEXT:    [[L0:%.*]] = load i8, i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0), align 1
+; CHECK-NEXT:    [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX183:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX185:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 4
+; CHECK-NEXT:    [[L0:%.*]] = load i8, ptr getelementptr inbounds ([6 x [258 x i8]], ptr @data, i64 0, i64 1, i64 0), align 1
 ; CHECK-NEXT:    [[CONV150:%.*]] = zext i8 [[L0]] to i16
 ; CHECK-NEXT:    [[ADD152:%.*]] = add nuw nsw i16 [[CONV150]], 10
-; CHECK-NEXT:    [[L1:%.*]] = load i8, i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1), align 1
+; CHECK-NEXT:    [[L1:%.*]] = load i8, ptr getelementptr inbounds ([6 x [258 x i8]], ptr @data, i64 0, i64 2, i64 1), align 1
 ; CHECK-NEXT:    [[CONV156:%.*]] = zext i8 [[L1]] to i16
 ; CHECK-NEXT:    [[ADD158:%.*]] = add nuw nsw i16 [[CONV156]], 20
-; CHECK-NEXT:    [[L2:%.*]] = load i8, i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2), align 1
+; CHECK-NEXT:    [[L2:%.*]] = load i8, ptr getelementptr inbounds ([6 x [258 x i8]], ptr @data, i64 0, i64 3, i64 2), align 1
 ; CHECK-NEXT:    [[CONV162:%.*]] = zext i8 [[L2]] to i16
 ; CHECK-NEXT:    [[ADD164:%.*]] = add nuw nsw i16 [[CONV162]], 30
-; CHECK-NEXT:    [[L3:%.*]] = load i8, i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3), align 1
+; CHECK-NEXT:    [[L3:%.*]] = load i8, ptr getelementptr inbounds ([6 x [258 x i8]], ptr @data, i64 0, i64 4, i64 3), align 1
 ; CHECK-NEXT:    [[CONV168:%.*]] = zext i8 [[L3]] to i16
 ; CHECK-NEXT:    [[ADD170:%.*]] = add nuw nsw i16 [[CONV168]], 40
-; CHECK-NEXT:    store i16 [[ADD152]], i16* [[ARRAYIDX182]], align 2
-; CHECK-NEXT:    store i16 [[ADD158]], i16* [[ARRAYIDX183]], align 2
-; CHECK-NEXT:    store i16 [[ADD164]], i16* [[ARRAYIDX184]], align 2
-; CHECK-NEXT:    store i16 [[ADD170]], i16* [[ARRAYIDX185]], align 2
+; CHECK-NEXT:    store i16 [[ADD152]], ptr [[ARRAYIDX182]], align 2
+; CHECK-NEXT:    store i16 [[ADD158]], ptr [[ARRAYIDX183]], align 2
+; CHECK-NEXT:    store i16 [[ADD164]], ptr [[ARRAYIDX184]], align 2
+; CHECK-NEXT:    store i16 [[ADD170]], ptr [[ARRAYIDX185]], align 2
 ; CHECK-NEXT:    ret void
 ;
-  %arrayidx182 = getelementptr inbounds i16, i16* %ptr, i64 1
-  %arrayidx183 = getelementptr inbounds i16, i16* %ptr, i64 2
-  %arrayidx184 = getelementptr inbounds i16, i16* %ptr, i64 3
-  %arrayidx185 = getelementptr inbounds i16, i16* %ptr, i64 4
-  %arrayidx149 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0
-  %l0 = load i8, i8* %arrayidx149, align 1
+  %arrayidx182 = getelementptr inbounds i16, ptr %ptr, i64 1
+  %arrayidx183 = getelementptr inbounds i16, ptr %ptr, i64 2
+  %arrayidx184 = getelementptr inbounds i16, ptr %ptr, i64 3
+  %arrayidx185 = getelementptr inbounds i16, ptr %ptr, i64 4
+  %arrayidx149 = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 1, i64 0
+  %l0 = load i8, ptr %arrayidx149, align 1
   %conv150 = zext i8 %l0 to i16
   %add152 = add i16 10, %conv150
-  %arrayidx155 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1
-  %l1 = load i8, i8* %arrayidx155, align 1
+  %arrayidx155 = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 2, i64 1
+  %l1 = load i8, ptr %arrayidx155, align 1
   %conv156 = zext i8 %l1 to i16
   %add158 = add i16 20, %conv156
-  %arrayidx161 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2
-  %l2 = load i8, i8* %arrayidx161, align 1
+  %arrayidx161 = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 3, i64 2
+  %l2 = load i8, ptr %arrayidx161, align 1
   %conv162 = zext i8 %l2 to i16
   %add164 = add i16 30, %conv162
-  %arrayidx167 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3
-  %l3 = load i8, i8* %arrayidx167, align 1
+  %arrayidx167 = getelementptr inbounds [6 x [258 x i8]], ptr @data, i64 0, i64 4, i64 3
+  %l3 = load i8, ptr %arrayidx167, align 1
   %conv168 = zext i8 %l3 to i16
   %add170 = add i16 40, %conv168
-  store i16 %add152, i16* %arrayidx182, align 2
-  store i16 %add158, i16* %arrayidx183, align 2
-  store i16 %add164, i16* %arrayidx184, align 2
-  store i16 %add170, i16* %arrayidx185, align 2
+  store i16 %add152, ptr %arrayidx182, align 2
+  store i16 %add158, ptr %arrayidx183, align 2
+  store i16 %add164, ptr %arrayidx184, align 2
+  store i16 %add170, ptr %arrayidx185, align 2
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll
index 8d26ad1628cae..1a16d258a3c8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll
@@ -4,10 +4,10 @@
 define void @foo() local_unnamed_addr {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load volatile double, double* poison, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load volatile double, double* poison, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load volatile double, double* poison, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = load volatile double, double* poison, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load volatile double, ptr poison, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load volatile double, ptr poison, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load volatile double, ptr poison, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load volatile double, ptr poison, align 8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[D30_0734:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
@@ -17,10 +17,10 @@ define void @foo() local_unnamed_addr {
 ; CHECK-NEXT:    br label [[FOR_BODY]]
 ;
 entry:
-  %0 = load volatile double, double* poison, align 8
-  %1 = load volatile double, double* poison, align 8
-  %2 = load volatile double, double* poison, align 8
-  %3 = load volatile double, double* poison, align 8
+  %0 = load volatile double, ptr poison, align 8
+  %1 = load volatile double, ptr poison, align 8
+  %2 = load volatile double, ptr poison, align 8
+  %3 = load volatile double, ptr poison, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
index 7b897fab08d34..de9859df3aad9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
@@ -20,7 +20,7 @@ target triple = "aarch64--linux-gnu"
 ;   return sum;
 ; }
 
-define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
+define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %g, i32 %n) {
 ; GENERIC-LABEL: @gather_reduce_8x16_i32(
 ; GENERIC-NEXT:  entry:
 ; GENERIC-NEXT:    [[CMP_99:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -35,61 +35,59 @@ define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture re
 ; GENERIC:       for.body:
 ; GENERIC-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; GENERIC-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT:    [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
-; GENERIC-NEXT:    [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
-; GENERIC-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; GENERIC-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
+; GENERIC-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
+; GENERIC-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
 ; GENERIC-NEXT:    [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
-; GENERIC-NEXT:    [[TMP3:%.*]] = bitcast i16* [[B:%.*]] to <8 x i16>*
-; GENERIC-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
+; GENERIC-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
 ; GENERIC-NEXT:    [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32>
 ; GENERIC-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]]
 ; GENERIC-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
 ; GENERIC-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[G:%.*]], i64 [[TMP8]]
-; GENERIC-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]]
+; GENERIC-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; GENERIC-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP9]] to i32
 ; GENERIC-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
 ; GENERIC-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
 ; GENERIC-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP11]]
-; GENERIC-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]]
+; GENERIC-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; GENERIC-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP12]] to i32
 ; GENERIC-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
 ; GENERIC-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
 ; GENERIC-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP14]]
-; GENERIC-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]]
+; GENERIC-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
 ; GENERIC-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP15]] to i32
 ; GENERIC-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
 ; GENERIC-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
 ; GENERIC-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP17]]
-; GENERIC-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]]
+; GENERIC-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
 ; GENERIC-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP18]] to i32
 ; GENERIC-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
 ; GENERIC-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
 ; GENERIC-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP20]]
-; GENERIC-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX37]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]]
+; GENERIC-NEXT:    [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
 ; GENERIC-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP21]] to i32
 ; GENERIC-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
 ; GENERIC-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
 ; GENERIC-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP23]]
-; GENERIC-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX46]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]]
+; GENERIC-NEXT:    [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
 ; GENERIC-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP24]] to i32
 ; GENERIC-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
 ; GENERIC-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
 ; GENERIC-NEXT:    [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP26]]
-; GENERIC-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]]
+; GENERIC-NEXT:    [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
 ; GENERIC-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
 ; GENERIC-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
 ; GENERIC-NEXT:    [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
 ; GENERIC-NEXT:    [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
-; GENERIC-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX64]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]]
+; GENERIC-NEXT:    [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
 ; GENERIC-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP30]] to i32
 ; GENERIC-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
 ; GENERIC-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
@@ -110,61 +108,59 @@ define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture re
 ; KRYO:       for.body:
 ; KRYO-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; KRYO-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT:    [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
-; KRYO-NEXT:    [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
-; KRYO-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; KRYO-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
+; KRYO-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
+; KRYO-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
 ; KRYO-NEXT:    [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
-; KRYO-NEXT:    [[TMP3:%.*]] = bitcast i16* [[B:%.*]] to <8 x i16>*
-; KRYO-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
+; KRYO-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
 ; KRYO-NEXT:    [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32>
 ; KRYO-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]]
 ; KRYO-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
 ; KRYO-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; KRYO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[G:%.*]], i64 [[TMP8]]
-; KRYO-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; KRYO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]]
+; KRYO-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; KRYO-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP9]] to i32
 ; KRYO-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
 ; KRYO-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
 ; KRYO-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; KRYO-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP11]]
-; KRYO-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
+; KRYO-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]]
+; KRYO-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; KRYO-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP12]] to i32
 ; KRYO-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
 ; KRYO-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
 ; KRYO-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; KRYO-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP14]]
-; KRYO-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2
+; KRYO-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]]
+; KRYO-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
 ; KRYO-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP15]] to i32
 ; KRYO-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
 ; KRYO-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
 ; KRYO-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; KRYO-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP17]]
-; KRYO-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2
+; KRYO-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]]
+; KRYO-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
 ; KRYO-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP18]] to i32
 ; KRYO-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
 ; KRYO-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
 ; KRYO-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-; KRYO-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP20]]
-; KRYO-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX37]], align 2
+; KRYO-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]]
+; KRYO-NEXT:    [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
 ; KRYO-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP21]] to i32
 ; KRYO-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
 ; KRYO-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
 ; KRYO-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; KRYO-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP23]]
-; KRYO-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX46]], align 2
+; KRYO-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]]
+; KRYO-NEXT:    [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
 ; KRYO-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP24]] to i32
 ; KRYO-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
 ; KRYO-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
 ; KRYO-NEXT:    [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
-; KRYO-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP26]]
-; KRYO-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
+; KRYO-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]]
+; KRYO-NEXT:    [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
 ; KRYO-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
 ; KRYO-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
 ; KRYO-NEXT:    [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
 ; KRYO-NEXT:    [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-; KRYO-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
-; KRYO-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX64]], align 2
+; KRYO-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]]
+; KRYO-NEXT:    [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
 ; KRYO-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP30]] to i32
 ; KRYO-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
 ; KRYO-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
@@ -188,92 +184,92 @@ for.cond.cleanup:
 for.body:
   %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
-  %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
-  %0 = load i16, i16* %a.addr.0101, align 2
+  %a.addr.0101 = phi ptr [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %a.addr.0101, i64 1
+  %0 = load i16, ptr %a.addr.0101, align 2
   %conv = zext i16 %0 to i32
-  %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
-  %1 = load i16, i16* %b, align 2
+  %incdec.ptr1 = getelementptr inbounds i16, ptr %b, i64 1
+  %1 = load i16, ptr %b, align 2
   %conv2 = zext i16 %1 to i32
   %sub = sub nsw i32 %conv, %conv2
-  %arrayidx = getelementptr inbounds i16, i16* %g, i32 %sub
-  %2 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %g, i32 %sub
+  %2 = load i16, ptr %arrayidx, align 2
   %conv3 = zext i16 %2 to i32
   %add = add nsw i32 %conv3, %sum.0102
-  %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
-  %3 = load i16, i16* %incdec.ptr, align 2
+  %incdec.ptr4 = getelementptr inbounds i16, ptr %a.addr.0101, i64 2
+  %3 = load i16, ptr %incdec.ptr, align 2
   %conv5 = zext i16 %3 to i32
-  %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
-  %4 = load i16, i16* %incdec.ptr1, align 2
+  %incdec.ptr6 = getelementptr inbounds i16, ptr %b, i64 2
+  %4 = load i16, ptr %incdec.ptr1, align 2
   %conv7 = zext i16 %4 to i32
   %sub8 = sub nsw i32 %conv5, %conv7
-  %arrayidx10 = getelementptr inbounds i16, i16* %g, i32 %sub8
-  %5 = load i16, i16* %arrayidx10, align 2
+  %arrayidx10 = getelementptr inbounds i16, ptr %g, i32 %sub8
+  %5 = load i16, ptr %arrayidx10, align 2
   %conv11 = zext i16 %5 to i32
   %add12 = add nsw i32 %add, %conv11
-  %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
-  %6 = load i16, i16* %incdec.ptr4, align 2
+  %incdec.ptr13 = getelementptr inbounds i16, ptr %a.addr.0101, i64 3
+  %6 = load i16, ptr %incdec.ptr4, align 2
   %conv14 = zext i16 %6 to i32
-  %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
-  %7 = load i16, i16* %incdec.ptr6, align 2
+  %incdec.ptr15 = getelementptr inbounds i16, ptr %b, i64 3
+  %7 = load i16, ptr %incdec.ptr6, align 2
   %conv16 = zext i16 %7 to i32
   %sub17 = sub nsw i32 %conv14, %conv16
-  %arrayidx19 = getelementptr inbounds i16, i16* %g, i32 %sub17
-  %8 = load i16, i16* %arrayidx19, align 2
+  %arrayidx19 = getelementptr inbounds i16, ptr %g, i32 %sub17
+  %8 = load i16, ptr %arrayidx19, align 2
   %conv20 = zext i16 %8 to i32
   %add21 = add nsw i32 %add12, %conv20
-  %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
-  %9 = load i16, i16* %incdec.ptr13, align 2
+  %incdec.ptr22 = getelementptr inbounds i16, ptr %a.addr.0101, i64 4
+  %9 = load i16, ptr %incdec.ptr13, align 2
   %conv23 = zext i16 %9 to i32
-  %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
-  %10 = load i16, i16* %incdec.ptr15, align 2
+  %incdec.ptr24 = getelementptr inbounds i16, ptr %b, i64 4
+  %10 = load i16, ptr %incdec.ptr15, align 2
   %conv25 = zext i16 %10 to i32
   %sub26 = sub nsw i32 %conv23, %conv25
-  %arrayidx28 = getelementptr inbounds i16, i16* %g, i32 %sub26
-  %11 = load i16, i16* %arrayidx28, align 2
+  %arrayidx28 = getelementptr inbounds i16, ptr %g, i32 %sub26
+  %11 = load i16, ptr %arrayidx28, align 2
   %conv29 = zext i16 %11 to i32
   %add30 = add nsw i32 %add21, %conv29
-  %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
-  %12 = load i16, i16* %incdec.ptr22, align 2
+  %incdec.ptr31 = getelementptr inbounds i16, ptr %a.addr.0101, i64 5
+  %12 = load i16, ptr %incdec.ptr22, align 2
   %conv32 = zext i16 %12 to i32
-  %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
-  %13 = load i16, i16* %incdec.ptr24, align 2
+  %incdec.ptr33 = getelementptr inbounds i16, ptr %b, i64 5
+  %13 = load i16, ptr %incdec.ptr24, align 2
   %conv34 = zext i16 %13 to i32
   %sub35 = sub nsw i32 %conv32, %conv34
-  %arrayidx37 = getelementptr inbounds i16, i16* %g, i32 %sub35
-  %14 = load i16, i16* %arrayidx37, align 2
+  %arrayidx37 = getelementptr inbounds i16, ptr %g, i32 %sub35
+  %14 = load i16, ptr %arrayidx37, align 2
   %conv38 = zext i16 %14 to i32
   %add39 = add nsw i32 %add30, %conv38
-  %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
-  %15 = load i16, i16* %incdec.ptr31, align 2
+  %incdec.ptr40 = getelementptr inbounds i16, ptr %a.addr.0101, i64 6
+  %15 = load i16, ptr %incdec.ptr31, align 2
   %conv41 = zext i16 %15 to i32
-  %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
-  %16 = load i16, i16* %incdec.ptr33, align 2
+  %incdec.ptr42 = getelementptr inbounds i16, ptr %b, i64 6
+  %16 = load i16, ptr %incdec.ptr33, align 2
   %conv43 = zext i16 %16 to i32
   %sub44 = sub nsw i32 %conv41, %conv43
-  %arrayidx46 = getelementptr inbounds i16, i16* %g, i32 %sub44
-  %17 = load i16, i16* %arrayidx46, align 2
+  %arrayidx46 = getelementptr inbounds i16, ptr %g, i32 %sub44
+  %17 = load i16, ptr %arrayidx46, align 2
   %conv47 = zext i16 %17 to i32
   %add48 = add nsw i32 %add39, %conv47
-  %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
-  %18 = load i16, i16* %incdec.ptr40, align 2
+  %incdec.ptr49 = getelementptr inbounds i16, ptr %a.addr.0101, i64 7
+  %18 = load i16, ptr %incdec.ptr40, align 2
   %conv50 = zext i16 %18 to i32
-  %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
-  %19 = load i16, i16* %incdec.ptr42, align 2
+  %incdec.ptr51 = getelementptr inbounds i16, ptr %b, i64 7
+  %19 = load i16, ptr %incdec.ptr42, align 2
   %conv52 = zext i16 %19 to i32
   %sub53 = sub nsw i32 %conv50, %conv52
-  %arrayidx55 = getelementptr inbounds i16, i16* %g, i32 %sub53
-  %20 = load i16, i16* %arrayidx55, align 2
+  %arrayidx55 = getelementptr inbounds i16, ptr %g, i32 %sub53
+  %20 = load i16, ptr %arrayidx55, align 2
   %conv56 = zext i16 %20 to i32
   %add57 = add nsw i32 %add48, %conv56
-  %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
-  %21 = load i16, i16* %incdec.ptr49, align 2
+  %incdec.ptr58 = getelementptr inbounds i16, ptr %a.addr.0101, i64 8
+  %21 = load i16, ptr %incdec.ptr49, align 2
   %conv59 = zext i16 %21 to i32
-  %22 = load i16, i16* %incdec.ptr51, align 2
+  %22 = load i16, ptr %incdec.ptr51, align 2
   %conv61 = zext i16 %22 to i32
   %sub62 = sub nsw i32 %conv59, %conv61
-  %arrayidx64 = getelementptr inbounds i16, i16* %g, i32 %sub62
-  %23 = load i16, i16* %arrayidx64, align 2
+  %arrayidx64 = getelementptr inbounds i16, ptr %g, i32 %sub62
+  %23 = load i16, ptr %arrayidx64, align 2
   %conv65 = zext i16 %23 to i32
   %add66 = add nsw i32 %add57, %conv65
   %inc = add nuw nsw i32 %i.0103, 1
@@ -281,7 +277,7 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
+define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %g, i32 %n) {
 ; GENERIC-LABEL: @gather_reduce_8x16_i64(
 ; GENERIC-NEXT:  entry:
 ; GENERIC-NEXT:    [[CMP_99:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -296,61 +292,59 @@ define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture re
 ; GENERIC:       for.body:
 ; GENERIC-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; GENERIC-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT:    [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; GENERIC-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
-; GENERIC-NEXT:    [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
-; GENERIC-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; GENERIC-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
+; GENERIC-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
+; GENERIC-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
 ; GENERIC-NEXT:    [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
-; GENERIC-NEXT:    [[TMP3:%.*]] = bitcast i16* [[B:%.*]] to <8 x i16>*
-; GENERIC-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
+; GENERIC-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
 ; GENERIC-NEXT:    [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32>
 ; GENERIC-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]]
 ; GENERIC-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
 ; GENERIC-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[G:%.*]], i64 [[TMP8]]
-; GENERIC-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]]
+; GENERIC-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; GENERIC-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP9]] to i32
 ; GENERIC-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
 ; GENERIC-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
 ; GENERIC-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP11]]
-; GENERIC-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]]
+; GENERIC-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; GENERIC-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP12]] to i32
 ; GENERIC-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
 ; GENERIC-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
 ; GENERIC-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP14]]
-; GENERIC-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]]
+; GENERIC-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
 ; GENERIC-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP15]] to i32
 ; GENERIC-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
 ; GENERIC-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
 ; GENERIC-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP17]]
-; GENERIC-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]]
+; GENERIC-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
 ; GENERIC-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP18]] to i32
 ; GENERIC-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
 ; GENERIC-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
 ; GENERIC-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP20]]
-; GENERIC-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX37]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]]
+; GENERIC-NEXT:    [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
 ; GENERIC-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP21]] to i32
 ; GENERIC-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
 ; GENERIC-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
 ; GENERIC-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP23]]
-; GENERIC-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX46]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]]
+; GENERIC-NEXT:    [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
 ; GENERIC-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP24]] to i32
 ; GENERIC-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
 ; GENERIC-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
 ; GENERIC-NEXT:    [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP26]]
-; GENERIC-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]]
+; GENERIC-NEXT:    [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
 ; GENERIC-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
 ; GENERIC-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
 ; GENERIC-NEXT:    [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
 ; GENERIC-NEXT:    [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-; GENERIC-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
-; GENERIC-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX64]], align 2
+; GENERIC-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]]
+; GENERIC-NEXT:    [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
 ; GENERIC-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP30]] to i32
 ; GENERIC-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
 ; GENERIC-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
@@ -371,61 +365,59 @@ define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture re
 ; KRYO:       for.body:
 ; KRYO-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; KRYO-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT:    [[A_ADDR_0101:%.*]] = phi i16* [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
-; KRYO-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, i16* [[A_ADDR_0101]], i64 8
-; KRYO-NEXT:    [[TMP0:%.*]] = bitcast i16* [[A_ADDR_0101]] to <8 x i16>*
-; KRYO-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; KRYO-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
+; KRYO-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds i16, ptr [[A_ADDR_0101]], i64 8
+; KRYO-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
 ; KRYO-NEXT:    [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
-; KRYO-NEXT:    [[TMP3:%.*]] = bitcast i16* [[B:%.*]] to <8 x i16>*
-; KRYO-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
+; KRYO-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
 ; KRYO-NEXT:    [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i32>
 ; KRYO-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]]
 ; KRYO-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
 ; KRYO-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; KRYO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[G:%.*]], i64 [[TMP8]]
-; KRYO-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; KRYO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP8]]
+; KRYO-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; KRYO-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP9]] to i32
 ; KRYO-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
 ; KRYO-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
 ; KRYO-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; KRYO-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP11]]
-; KRYO-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
+; KRYO-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP11]]
+; KRYO-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; KRYO-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP12]] to i32
 ; KRYO-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
 ; KRYO-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
 ; KRYO-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; KRYO-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP14]]
-; KRYO-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2
+; KRYO-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP14]]
+; KRYO-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
 ; KRYO-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP15]] to i32
 ; KRYO-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
 ; KRYO-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
 ; KRYO-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; KRYO-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP17]]
-; KRYO-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2
+; KRYO-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP17]]
+; KRYO-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
 ; KRYO-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP18]] to i32
 ; KRYO-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
 ; KRYO-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
 ; KRYO-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-; KRYO-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP20]]
-; KRYO-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX37]], align 2
+; KRYO-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP20]]
+; KRYO-NEXT:    [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
 ; KRYO-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP21]] to i32
 ; KRYO-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
 ; KRYO-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
 ; KRYO-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; KRYO-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP23]]
-; KRYO-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX46]], align 2
+; KRYO-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP23]]
+; KRYO-NEXT:    [[TMP24:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
 ; KRYO-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP24]] to i32
 ; KRYO-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
 ; KRYO-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
 ; KRYO-NEXT:    [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
-; KRYO-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP26]]
-; KRYO-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX55]], align 2
+; KRYO-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP26]]
+; KRYO-NEXT:    [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
 ; KRYO-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP27]] to i32
 ; KRYO-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
 ; KRYO-NEXT:    [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
 ; KRYO-NEXT:    [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
-; KRYO-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, i16* [[G]], i64 [[TMP29]]
-; KRYO-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX64]], align 2
+; KRYO-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP29]]
+; KRYO-NEXT:    [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
 ; KRYO-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP30]] to i32
 ; KRYO-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
 ; KRYO-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
@@ -449,92 +441,92 @@ for.cond.cleanup:
 for.body:
   %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
-  %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
-  %0 = load i16, i16* %a.addr.0101, align 2
+  %a.addr.0101 = phi ptr [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %a.addr.0101, i64 1
+  %0 = load i16, ptr %a.addr.0101, align 2
   %conv = zext i16 %0 to i64
-  %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
-  %1 = load i16, i16* %b, align 2
+  %incdec.ptr1 = getelementptr inbounds i16, ptr %b, i64 1
+  %1 = load i16, ptr %b, align 2
   %conv2 = zext i16 %1 to i64
   %sub = sub nsw i64 %conv, %conv2
-  %arrayidx = getelementptr inbounds i16, i16* %g, i64 %sub
-  %2 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %g, i64 %sub
+  %2 = load i16, ptr %arrayidx, align 2
   %conv3 = zext i16 %2 to i32
   %add = add nsw i32 %conv3, %sum.0102
-  %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
-  %3 = load i16, i16* %incdec.ptr, align 2
+  %incdec.ptr4 = getelementptr inbounds i16, ptr %a.addr.0101, i64 2
+  %3 = load i16, ptr %incdec.ptr, align 2
   %conv5 = zext i16 %3 to i64
-  %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
-  %4 = load i16, i16* %incdec.ptr1, align 2
+  %incdec.ptr6 = getelementptr inbounds i16, ptr %b, i64 2
+  %4 = load i16, ptr %incdec.ptr1, align 2
   %conv7 = zext i16 %4 to i64
   %sub8 = sub nsw i64 %conv5, %conv7
-  %arrayidx10 = getelementptr inbounds i16, i16* %g, i64 %sub8
-  %5 = load i16, i16* %arrayidx10, align 2
+  %arrayidx10 = getelementptr inbounds i16, ptr %g, i64 %sub8
+  %5 = load i16, ptr %arrayidx10, align 2
   %conv11 = zext i16 %5 to i32
   %add12 = add nsw i32 %add, %conv11
-  %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
-  %6 = load i16, i16* %incdec.ptr4, align 2
+  %incdec.ptr13 = getelementptr inbounds i16, ptr %a.addr.0101, i64 3
+  %6 = load i16, ptr %incdec.ptr4, align 2
   %conv14 = zext i16 %6 to i64
-  %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
-  %7 = load i16, i16* %incdec.ptr6, align 2
+  %incdec.ptr15 = getelementptr inbounds i16, ptr %b, i64 3
+  %7 = load i16, ptr %incdec.ptr6, align 2
   %conv16 = zext i16 %7 to i64
   %sub17 = sub nsw i64 %conv14, %conv16
-  %arrayidx19 = getelementptr inbounds i16, i16* %g, i64 %sub17
-  %8 = load i16, i16* %arrayidx19, align 2
+  %arrayidx19 = getelementptr inbounds i16, ptr %g, i64 %sub17
+  %8 = load i16, ptr %arrayidx19, align 2
   %conv20 = zext i16 %8 to i32
   %add21 = add nsw i32 %add12, %conv20
-  %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
-  %9 = load i16, i16* %incdec.ptr13, align 2
+  %incdec.ptr22 = getelementptr inbounds i16, ptr %a.addr.0101, i64 4
+  %9 = load i16, ptr %incdec.ptr13, align 2
   %conv23 = zext i16 %9 to i64
-  %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
-  %10 = load i16, i16* %incdec.ptr15, align 2
+  %incdec.ptr24 = getelementptr inbounds i16, ptr %b, i64 4
+  %10 = load i16, ptr %incdec.ptr15, align 2
   %conv25 = zext i16 %10 to i64
   %sub26 = sub nsw i64 %conv23, %conv25
-  %arrayidx28 = getelementptr inbounds i16, i16* %g, i64 %sub26
-  %11 = load i16, i16* %arrayidx28, align 2
+  %arrayidx28 = getelementptr inbounds i16, ptr %g, i64 %sub26
+  %11 = load i16, ptr %arrayidx28, align 2
   %conv29 = zext i16 %11 to i32
   %add30 = add nsw i32 %add21, %conv29
-  %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
-  %12 = load i16, i16* %incdec.ptr22, align 2
+  %incdec.ptr31 = getelementptr inbounds i16, ptr %a.addr.0101, i64 5
+  %12 = load i16, ptr %incdec.ptr22, align 2
   %conv32 = zext i16 %12 to i64
-  %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
-  %13 = load i16, i16* %incdec.ptr24, align 2
+  %incdec.ptr33 = getelementptr inbounds i16, ptr %b, i64 5
+  %13 = load i16, ptr %incdec.ptr24, align 2
   %conv34 = zext i16 %13 to i64
   %sub35 = sub nsw i64 %conv32, %conv34
-  %arrayidx37 = getelementptr inbounds i16, i16* %g, i64 %sub35
-  %14 = load i16, i16* %arrayidx37, align 2
+  %arrayidx37 = getelementptr inbounds i16, ptr %g, i64 %sub35
+  %14 = load i16, ptr %arrayidx37, align 2
   %conv38 = zext i16 %14 to i32
   %add39 = add nsw i32 %add30, %conv38
-  %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
-  %15 = load i16, i16* %incdec.ptr31, align 2
+  %incdec.ptr40 = getelementptr inbounds i16, ptr %a.addr.0101, i64 6
+  %15 = load i16, ptr %incdec.ptr31, align 2
   %conv41 = zext i16 %15 to i64
-  %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
-  %16 = load i16, i16* %incdec.ptr33, align 2
+  %incdec.ptr42 = getelementptr inbounds i16, ptr %b, i64 6
+  %16 = load i16, ptr %incdec.ptr33, align 2
   %conv43 = zext i16 %16 to i64
   %sub44 = sub nsw i64 %conv41, %conv43
-  %arrayidx46 = getelementptr inbounds i16, i16* %g, i64 %sub44
-  %17 = load i16, i16* %arrayidx46, align 2
+  %arrayidx46 = getelementptr inbounds i16, ptr %g, i64 %sub44
+  %17 = load i16, ptr %arrayidx46, align 2
   %conv47 = zext i16 %17 to i32
   %add48 = add nsw i32 %add39, %conv47
-  %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
-  %18 = load i16, i16* %incdec.ptr40, align 2
+  %incdec.ptr49 = getelementptr inbounds i16, ptr %a.addr.0101, i64 7
+  %18 = load i16, ptr %incdec.ptr40, align 2
   %conv50 = zext i16 %18 to i64
-  %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
-  %19 = load i16, i16* %incdec.ptr42, align 2
+  %incdec.ptr51 = getelementptr inbounds i16, ptr %b, i64 7
+  %19 = load i16, ptr %incdec.ptr42, align 2
   %conv52 = zext i16 %19 to i64
   %sub53 = sub nsw i64 %conv50, %conv52
-  %arrayidx55 = getelementptr inbounds i16, i16* %g, i64 %sub53
-  %20 = load i16, i16* %arrayidx55, align 2
+  %arrayidx55 = getelementptr inbounds i16, ptr %g, i64 %sub53
+  %20 = load i16, ptr %arrayidx55, align 2
   %conv56 = zext i16 %20 to i32
   %add57 = add nsw i32 %add48, %conv56
-  %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
-  %21 = load i16, i16* %incdec.ptr49, align 2
+  %incdec.ptr58 = getelementptr inbounds i16, ptr %a.addr.0101, i64 8
+  %21 = load i16, ptr %incdec.ptr49, align 2
   %conv59 = zext i16 %21 to i64
-  %22 = load i16, i16* %incdec.ptr51, align 2
+  %22 = load i16, ptr %incdec.ptr51, align 2
   %conv61 = zext i16 %22 to i64
   %sub62 = sub nsw i64 %conv59, %conv61
-  %arrayidx64 = getelementptr inbounds i16, i16* %g, i64 %sub62
-  %23 = load i16, i16* %arrayidx64, align 2
+  %arrayidx64 = getelementptr inbounds i16, ptr %g, i64 %sub62
+  %23 = load i16, ptr %arrayidx64, align 2
   %conv65 = zext i16 %23 to i32
   %add66 = add nsw i32 %add57, %conv65
   %inc = add nuw nsw i32 %i.0103, 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index d18f5fac9f250..8987f34e561e6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -11,7 +11,7 @@ target triple = "aarch64--linux-gnu"
 define void @PR28330(i32 %n) {
 ; DEFAULT-LABEL: @PR28330(
 ; DEFAULT-NEXT:  entry:
-; DEFAULT-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; DEFAULT-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
 ; DEFAULT:       for.body:
@@ -23,7 +23,7 @@ define void @PR28330(i32 %n) {
 ;
 ; GATHER-LABEL: @PR28330(
 ; GATHER-NEXT:  entry:
-; GATHER-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; GATHER-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
 ; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
@@ -35,7 +35,7 @@ define void @PR28330(i32 %n) {
 ;
 ; MAX-COST-LABEL: @PR28330(
 ; MAX-COST-NEXT:  entry:
-; MAX-COST-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; MAX-COST-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
 ; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
@@ -46,21 +46,21 @@ define void @PR28330(i32 %n) {
 ; MAX-COST-NEXT:    br label [[FOR_BODY]]
 ;
 entry:
-  %p0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
+  %p0 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
   %p1 = icmp eq i8 %p0, 0
-  %p2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
+  %p2 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 2), align 2
   %p3 = icmp eq i8 %p2, 0
-  %p4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
+  %p4 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 3), align 1
   %p5 = icmp eq i8 %p4, 0
-  %p6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
+  %p6 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 4), align 4
   %p7 = icmp eq i8 %p6, 0
-  %p8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
+  %p8 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 5), align 1
   %p9 = icmp eq i8 %p8, 0
-  %p10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
+  %p10 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 6), align 2
   %p11 = icmp eq i8 %p10, 0
-  %p12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
+  %p12 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 7), align 1
   %p13 = icmp eq i8 %p12, 0
-  %p14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
+  %p14 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 8), align 8
   %p15 = icmp eq i8 %p14, 0
   br label %for.body
 
@@ -88,7 +88,7 @@ for.body:
 define void @PR32038(i32 %n) {
 ; DEFAULT-LABEL: @PR32038(
 ; DEFAULT-NEXT:  entry:
-; DEFAULT-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; DEFAULT-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
 ; DEFAULT:       for.body:
@@ -100,7 +100,7 @@ define void @PR32038(i32 %n) {
 ;
 ; GATHER-LABEL: @PR32038(
 ; GATHER-NEXT:  entry:
-; GATHER-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; GATHER-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
 ; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
@@ -112,7 +112,7 @@ define void @PR32038(i32 %n) {
 ;
 ; MAX-COST-LABEL: @PR32038(
 ; MAX-COST-NEXT:  entry:
-; MAX-COST-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; MAX-COST-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
 ; MAX-COST-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
@@ -123,21 +123,21 @@ define void @PR32038(i32 %n) {
 ; MAX-COST-NEXT:    br label [[FOR_BODY]]
 ;
 entry:
-  %p0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
+  %p0 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 1), align 1
   %p1 = icmp eq i8 %p0, 0
-  %p2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
+  %p2 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 2), align 2
   %p3 = icmp eq i8 %p2, 0
-  %p4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
+  %p4 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 3), align 1
   %p5 = icmp eq i8 %p4, 0
-  %p6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
+  %p6 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 4), align 4
   %p7 = icmp eq i8 %p6, 0
-  %p8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
+  %p8 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 5), align 1
   %p9 = icmp eq i8 %p8, 0
-  %p10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
+  %p10 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 6), align 2
   %p11 = icmp eq i8 %p10, 0
-  %p12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
+  %p12 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 7), align 1
   %p13 = icmp eq i8 %p12, 0
-  %p14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
+  %p14 = load i8, ptr getelementptr inbounds ([80 x i8], ptr @a, i64 0, i64 8), align 8
   %p15 = icmp eq i8 %p14, 0
   br label %for.body
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
index f301bc340a75b..a567b6c71e898 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -44,7 +44,7 @@ target triple = "aarch64--linux-gnu"
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '3'
 
-define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+define i32 @getelementptr_4x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @getelementptr_4x32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -68,24 +68,24 @@ define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP7]]
-; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP5]], i64 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP9]]
-; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP9]]
+; CHECK-NEXT:    [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i64 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]]
-; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP12]]
+; CHECK-NEXT:    [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i64 1
 ; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]]
-; CHECK-NEXT:    [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP14]]
+; CHECK-NEXT:    [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
 ; CHECK-NEXT:    [[ADD16]] = add nsw i32 [[ADD11]], [[T12]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
@@ -110,20 +110,20 @@ for.body:
   %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
   %t4 = shl nsw i32 %indvars.iv, 1
   %t5 = add nsw i32 %t4, 0
-  %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
-  %t6 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %g, i32 %t5
+  %t6 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %t6, %sum.032
   %t7 = add nsw i32 %t4, %x
-  %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
-  %t8 = load i32, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %g, i32 %t7
+  %t8 = load i32, ptr %arrayidx5, align 4
   %add6 = add nsw i32 %add1, %t8
   %t9 = add nsw i32 %t4, %y
-  %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
-  %t10 = load i32, i32* %arrayidx10, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %g, i32 %t9
+  %t10 = load i32, ptr %arrayidx10, align 4
   %add11 = add nsw i32 %add6, %t10
   %t11 = add nsw i32 %t4, %z
-  %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
-  %t12 = load i32, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %g, i32 %t11
+  %t12 = load i32, ptr %arrayidx15, align 4
   %add16 = add nsw i32 %add11, %t12
   %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
   %exitcond = icmp eq i32 %indvars.iv.next , %n
@@ -141,7 +141,7 @@ for.body:
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '3'
 
-define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @getelementptr_2x32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -160,26 +160,26 @@ define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %
 ; CHECK-NEXT:    [[SUM_032:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD16]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[T4:%.*]] = shl nuw nsw i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[T4]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]]
 ; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T4]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[T7]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP3]]
-; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP3]]
+; CHECK-NEXT:    [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i64 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP8]]
-; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP8]]
+; CHECK-NEXT:    [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP10]]
-; CHECK-NEXT:    [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP10]]
+; CHECK-NEXT:    [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
 ; CHECK-NEXT:    [[ADD16]] = add nsw i32 [[ADD11]], [[T12]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
@@ -204,80 +204,77 @@ for.body:
   %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
   %t4 = shl nsw i32 %indvars.iv, 1
   %t5 = add nsw i32 %t4, 0
-  %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
-  %t6 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %g, i32 %t5
+  %t6 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %t6, %sum.032
   %t7 = add nsw i32 %t4, 1
-  %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
-  %t8 = load i32, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %g, i32 %t7
+  %t8 = load i32, ptr %arrayidx5, align 4
   %add6 = add nsw i32 %add1, %t8
   %t9 = add nsw i32 %t4, %y
-  %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
-  %t10 = load i32, i32* %arrayidx10, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %g, i32 %t9
+  %t10 = load i32, ptr %arrayidx10, align 4
   %add11 = add nsw i32 %add6, %t10
   %t11 = add nsw i32 %t4, %z
-  %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
-  %t12 = load i32, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %g, i32 %t11
+  %t12 = load i32, ptr %arrayidx15, align 4
   %add16 = add nsw i32 %add11, %t12
   %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
   %exitcond = icmp eq i32 %indvars.iv.next , %n
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
- at global = internal global { i32* } zeroinitializer, align 8
+ at global = internal global { ptr } zeroinitializer, align 8
 
 ; Make sure we vectorize to maximize the load with when loading i16 and
 ; extending it for compute operations.
-define void @test_i16_extend(i16* %p.1, i16* %p.2, i32 %idx.i32) {
+define void @test_i16_extend(ptr %p.1, ptr %p.2, i32 %idx.i32) {
 ; CHECK-LABEL: @test_i16_extend(
-; CHECK-NEXT:    [[P_0:%.*]] = load i32*, i32** getelementptr inbounds ({ i32* }, { i32* }* @global, i64 0, i32 0), align 8
+; CHECK-NEXT:    [[P_0:%.*]] = load ptr, ptr @global, align 8
 ; CHECK-NEXT:    [[IDX_0:%.*]] = zext i32 [[IDX_I32:%.*]] to i64
-; CHECK-NEXT:    [[T53:%.*]] = getelementptr inbounds i16, i16* [[P_1:%.*]], i64 [[IDX_0]]
-; CHECK-NEXT:    [[T56:%.*]] = getelementptr inbounds i16, i16* [[P_2:%.*]], i64 [[IDX_0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[T53]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]]
+; CHECK-NEXT:    [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[T53]], align 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[T56]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr [[T56]], align 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <8 x i16> [[TMP5]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP7]], i64 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[T60:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP9]]
-; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[T60]], align 4
+; CHECK-NEXT:    [[T60:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP9]]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, ptr [[T60]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <8 x i32> [[TMP7]], i64 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[T71:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP11]]
-; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[T71]], align 4
+; CHECK-NEXT:    [[T71:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP11]]
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, ptr [[T71]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[TMP7]], i64 2
 ; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[T82:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP13]]
-; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[T82]], align 4
+; CHECK-NEXT:    [[T82:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP13]]
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, ptr [[T82]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP7]], i64 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[T93:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP15]]
-; CHECK-NEXT:    [[L_4:%.*]] = load i32, i32* [[T93]], align 4
+; CHECK-NEXT:    [[T93:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP15]]
+; CHECK-NEXT:    [[L_4:%.*]] = load i32, ptr [[T93]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i32> [[TMP7]], i64 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[T104:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP17]]
-; CHECK-NEXT:    [[L_5:%.*]] = load i32, i32* [[T104]], align 4
+; CHECK-NEXT:    [[T104:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP17]]
+; CHECK-NEXT:    [[L_5:%.*]] = load i32, ptr [[T104]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i32> [[TMP7]], i64 5
 ; CHECK-NEXT:    [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
-; CHECK-NEXT:    [[T115:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP19]]
-; CHECK-NEXT:    [[L_6:%.*]] = load i32, i32* [[T115]], align 4
+; CHECK-NEXT:    [[T115:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP19]]
+; CHECK-NEXT:    [[L_6:%.*]] = load i32, ptr [[T115]], align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP7]], i64 6
 ; CHECK-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
-; CHECK-NEXT:    [[T126:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP21]]
-; CHECK-NEXT:    [[L_7:%.*]] = load i32, i32* [[T126]], align 4
+; CHECK-NEXT:    [[T126:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP21]]
+; CHECK-NEXT:    [[L_7:%.*]] = load i32, ptr [[T126]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP7]], i64 7
 ; CHECK-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; CHECK-NEXT:    [[T137:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP23]]
-; CHECK-NEXT:    [[L_8:%.*]] = load i32, i32* [[T137]], align 4
+; CHECK-NEXT:    [[T137:%.*]] = getelementptr inbounds i32, ptr [[P_0]], i64 [[TMP23]]
+; CHECK-NEXT:    [[L_8:%.*]] = load i32, ptr [[T137]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[L_1]], i32 [[L_2]], i32 [[L_3]], i32 [[L_4]], i32 [[L_5]], i32 [[L_6]], i32 [[L_7]], i32 [[L_8]])
 ; CHECK-NEXT:    ret void
 ;
-  %g = getelementptr inbounds { i32*}, { i32 *}* @global, i64 0, i32 0
-  %p.0 = load i32*, i32** %g, align 8
+  %p.0 = load ptr, ptr @global, align 8
 
   %idx.0 = zext i32 %idx.i32 to i64
   %idx.1 = add nsw i64 %idx.0, 1
@@ -288,93 +285,93 @@ define void @test_i16_extend(i16* %p.1, i16* %p.2, i32 %idx.i32) {
   %idx.6 = add nsw i64 %idx.0, 6
   %idx.7 = add nsw i64 %idx.0, 7
 
-  %t53 = getelementptr inbounds i16, i16* %p.1, i64 %idx.0
-  %op1.l = load i16, i16* %t53, align 2
+  %t53 = getelementptr inbounds i16, ptr %p.1, i64 %idx.0
+  %op1.l = load i16, ptr %t53, align 2
   %op1.ext = zext i16 %op1.l to i64
-  %t56 = getelementptr inbounds i16, i16* %p.2, i64 %idx.0
-  %op2.l = load i16, i16* %t56, align 2
+  %t56 = getelementptr inbounds i16, ptr %p.2, i64 %idx.0
+  %op2.l = load i16, ptr %t56, align 2
   %op2.ext = zext i16 %op2.l to i64
   %sub.1 = sub nsw i64 %op1.ext, %op2.ext
 
-  %t60 = getelementptr inbounds i32, i32* %p.0, i64 %sub.1
-  %l.1 = load i32, i32* %t60, align 4
+  %t60 = getelementptr inbounds i32, ptr %p.0, i64 %sub.1
+  %l.1 = load i32, ptr %t60, align 4
 
-  %t64 = getelementptr inbounds i16, i16* %p.1, i64 %idx.1
-  %t65 = load i16, i16* %t64, align 2
+  %t64 = getelementptr inbounds i16, ptr %p.1, i64 %idx.1
+  %t65 = load i16, ptr %t64, align 2
   %t66 = zext i16 %t65 to i64
-  %t67 = getelementptr inbounds i16, i16* %p.2, i64 %idx.1
-  %t68 = load i16, i16* %t67, align 2
+  %t67 = getelementptr inbounds i16, ptr %p.2, i64 %idx.1
+  %t68 = load i16, ptr %t67, align 2
   %t69 = zext i16 %t68 to i64
   %sub.2 = sub nsw i64 %t66, %t69
 
-  %t71 = getelementptr inbounds i32, i32* %p.0, i64 %sub.2
-  %l.2 = load i32, i32* %t71, align 4
+  %t71 = getelementptr inbounds i32, ptr %p.0, i64 %sub.2
+  %l.2 = load i32, ptr %t71, align 4
 
-  %t75 = getelementptr inbounds i16, i16* %p.1, i64 %idx.2
-  %t76 = load i16, i16* %t75, align 2
+  %t75 = getelementptr inbounds i16, ptr %p.1, i64 %idx.2
+  %t76 = load i16, ptr %t75, align 2
   %t77 = zext i16 %t76 to i64
-  %t78 = getelementptr inbounds i16, i16* %p.2, i64 %idx.2
-  %t79 = load i16, i16* %t78, align 2
+  %t78 = getelementptr inbounds i16, ptr %p.2, i64 %idx.2
+  %t79 = load i16, ptr %t78, align 2
   %t80 = zext i16 %t79 to i64
   %sub.3 = sub nsw i64 %t77, %t80
 
-  %t82 = getelementptr inbounds i32, i32* %p.0, i64 %sub.3
-  %l.3 = load i32, i32* %t82, align 4
+  %t82 = getelementptr inbounds i32, ptr %p.0, i64 %sub.3
+  %l.3 = load i32, ptr %t82, align 4
 
-  %t86 = getelementptr inbounds i16, i16* %p.1, i64 %idx.3
-  %t87 = load i16, i16* %t86, align 2
+  %t86 = getelementptr inbounds i16, ptr %p.1, i64 %idx.3
+  %t87 = load i16, ptr %t86, align 2
   %t88 = zext i16 %t87 to i64
-  %t89 = getelementptr inbounds i16, i16* %p.2, i64 %idx.3
-  %t90 = load i16, i16* %t89, align 2
+  %t89 = getelementptr inbounds i16, ptr %p.2, i64 %idx.3
+  %t90 = load i16, ptr %t89, align 2
   %t91 = zext i16 %t90 to i64
   %sub.4 = sub nsw i64 %t88, %t91
 
-  %t93 = getelementptr inbounds i32, i32* %p.0, i64 %sub.4
-  %l.4 = load i32, i32* %t93, align 4
+  %t93 = getelementptr inbounds i32, ptr %p.0, i64 %sub.4
+  %l.4 = load i32, ptr %t93, align 4
 
-  %t97 = getelementptr inbounds i16, i16* %p.1, i64 %idx.4
-  %t98 = load i16, i16* %t97, align 2
+  %t97 = getelementptr inbounds i16, ptr %p.1, i64 %idx.4
+  %t98 = load i16, ptr %t97, align 2
   %t99 = zext i16 %t98 to i64
-  %t100 = getelementptr inbounds i16, i16* %p.2, i64 %idx.4
-  %t101 = load i16, i16* %t100, align 2
+  %t100 = getelementptr inbounds i16, ptr %p.2, i64 %idx.4
+  %t101 = load i16, ptr %t100, align 2
   %t102 = zext i16 %t101 to i64
   %sub.5 = sub nsw i64 %t99, %t102
 
-  %t104 = getelementptr inbounds i32, i32* %p.0, i64 %sub.5
-  %l.5 = load i32, i32* %t104, align 4
+  %t104 = getelementptr inbounds i32, ptr %p.0, i64 %sub.5
+  %l.5 = load i32, ptr %t104, align 4
 
-  %t108 = getelementptr inbounds i16, i16* %p.1, i64 %idx.5
-  %t109 = load i16, i16* %t108, align 2
+  %t108 = getelementptr inbounds i16, ptr %p.1, i64 %idx.5
+  %t109 = load i16, ptr %t108, align 2
   %t110 = zext i16 %t109 to i64
-  %t111 = getelementptr inbounds i16, i16* %p.2, i64 %idx.5
-  %t112 = load i16, i16* %t111, align 2
+  %t111 = getelementptr inbounds i16, ptr %p.2, i64 %idx.5
+  %t112 = load i16, ptr %t111, align 2
   %t113 = zext i16 %t112 to i64
   %sub.6 = sub nsw i64 %t110, %t113
 
-  %t115 = getelementptr inbounds i32, i32* %p.0, i64 %sub.6
-  %l.6 = load i32, i32* %t115, align 4
+  %t115 = getelementptr inbounds i32, ptr %p.0, i64 %sub.6
+  %l.6 = load i32, ptr %t115, align 4
 
-  %t119 = getelementptr inbounds i16, i16* %p.1, i64 %idx.6
-  %t120 = load i16, i16* %t119, align 2
+  %t119 = getelementptr inbounds i16, ptr %p.1, i64 %idx.6
+  %t120 = load i16, ptr %t119, align 2
   %t121 = zext i16 %t120 to i64
-  %t122 = getelementptr inbounds i16, i16* %p.2, i64 %idx.6
-  %t123 = load i16, i16* %t122, align 2
+  %t122 = getelementptr inbounds i16, ptr %p.2, i64 %idx.6
+  %t123 = load i16, ptr %t122, align 2
   %t124 = zext i16 %t123 to i64
   %sub.7 = sub nsw i64 %t121, %t124
 
-  %t126 = getelementptr inbounds i32, i32* %p.0, i64 %sub.7
-  %l.7 = load i32, i32* %t126, align 4
+  %t126 = getelementptr inbounds i32, ptr %p.0, i64 %sub.7
+  %l.7 = load i32, ptr %t126, align 4
 
-  %t130 = getelementptr inbounds i16, i16* %p.1, i64 %idx.7
-  %t131 = load i16, i16* %t130, align 2
+  %t130 = getelementptr inbounds i16, ptr %p.1, i64 %idx.7
+  %t131 = load i16, ptr %t130, align 2
   %t132 = zext i16 %t131 to i64
-  %t133 = getelementptr inbounds i16, i16* %p.2, i64 %idx.7
-  %t134 = load i16, i16* %t133, align 2
+  %t133 = getelementptr inbounds i16, ptr %p.2, i64 %idx.7
+  %t134 = load i16, ptr %t133, align 2
   %t135 = zext i16 %t134 to i64
   %sub.8 = sub nsw i64 %t132, %t135
 
-  %t137 = getelementptr inbounds i32, i32* %p.0, i64 %sub.8
-  %l.8 = load i32, i32* %t137, align 4
+  %t137 = getelementptr inbounds i32, ptr %p.0, i64 %sub.8
+  %l.8 = load i32, ptr %t137, align 4
 
   call void @use(i32 %l.1, i32 %l.2, i32 %l.3, i32 %l.4, i32 %l.5, i32 %l.6, i32 %l.7, i32 %l.8)
   ret void

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
index 3f295f5aff90e..bc39cfb87bea1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -19,7 +19,7 @@ target triple = "aarch64--linux"
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '8'
 
-define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
+define i32 @test_select(ptr noalias nocapture readonly %blk1, ptr noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
 ; CHECK-LABEL: @test_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_22:%.*]] = icmp sgt i32 [[H:%.*]], 0
@@ -30,20 +30,18 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[S_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[J_025:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[P2_024:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[P1_023:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P1_023]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_024]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[P2_024:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR29:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P1_023:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P1_023]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[P2_024]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP8]], [[S_026]]
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[P1_023]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR29]] = getelementptr inbounds i32, ptr [[P2_024]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_025]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[H]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
@@ -64,44 +62,44 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %s.026 = phi i32 [ 0, %for.body.lr.ph ], [ %add27, %for.body ]
   %j.025 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
-  %p2.024 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr29, %for.body ]
-  %p1.023 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %for.body ]
-  %0 = load i32, i32* %p1.023, align 4
-  %1 = load i32, i32* %p2.024, align 4
+  %p2.024 = phi ptr [ %blk2, %for.body.lr.ph ], [ %add.ptr29, %for.body ]
+  %p1.023 = phi ptr [ %blk1, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+  %0 = load i32, ptr %p1.023, align 4
+  %1 = load i32, ptr %p2.024, align 4
   %sub = sub nsw i32 %0, %1
   %cmp2 = icmp slt i32 %sub, 0
   %sub3 = sub nsw i32 0, %sub
   %sub3.sub = select i1 %cmp2, i32 %sub3, i32 %sub
   %add = add nsw i32 %sub3.sub, %s.026
-  %arrayidx4 = getelementptr inbounds i32, i32* %p1.023, i64 1
-  %2 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %p2.024, i64 1
-  %3 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %p1.023, i64 1
+  %2 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %p2.024, i64 1
+  %3 = load i32, ptr %arrayidx5, align 4
   %sub6 = sub nsw i32 %2, %3
   %cmp7 = icmp slt i32 %sub6, 0
   %sub9 = sub nsw i32 0, %sub6
   %v.1 = select i1 %cmp7, i32 %sub9, i32 %sub6
   %add11 = add nsw i32 %add, %v.1
-  %arrayidx12 = getelementptr inbounds i32, i32* %p1.023, i64 2
-  %4 = load i32, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %p2.024, i64 2
-  %5 = load i32, i32* %arrayidx13, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %p1.023, i64 2
+  %4 = load i32, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %p2.024, i64 2
+  %5 = load i32, ptr %arrayidx13, align 4
   %sub14 = sub nsw i32 %4, %5
   %cmp15 = icmp slt i32 %sub14, 0
   %sub17 = sub nsw i32 0, %sub14
   %sub17.sub14 = select i1 %cmp15, i32 %sub17, i32 %sub14
   %add19 = add nsw i32 %add11, %sub17.sub14
-  %arrayidx20 = getelementptr inbounds i32, i32* %p1.023, i64 3
-  %6 = load i32, i32* %arrayidx20, align 4
-  %arrayidx21 = getelementptr inbounds i32, i32* %p2.024, i64 3
-  %7 = load i32, i32* %arrayidx21, align 4
+  %arrayidx20 = getelementptr inbounds i32, ptr %p1.023, i64 3
+  %6 = load i32, ptr %arrayidx20, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %p2.024, i64 3
+  %7 = load i32, ptr %arrayidx21, align 4
   %sub22 = sub nsw i32 %6, %7
   %cmp23 = icmp slt i32 %sub22, 0
   %sub25 = sub nsw i32 0, %sub22
   %v.3 = select i1 %cmp23, i32 %sub25, i32 %sub22
   %add27 = add nsw i32 %add19, %v.3
-  %add.ptr = getelementptr inbounds i32, i32* %p1.023, i64 %idx.ext
-  %add.ptr29 = getelementptr inbounds i32, i32* %p2.024, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i32, ptr %p1.023, i64 %idx.ext
+  %add.ptr29 = getelementptr inbounds i32, ptr %p2.024, i64 %idx.ext
   %inc = add nuw nsw i32 %j.025, 1
   %exitcond = icmp eq i32 %inc, %h
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -121,16 +119,16 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;;
 ;; int s = 0;
 ;; for (int j = 0; j < h; j++) {
-;;   s += p1[0] * p2[0]
-;;   s += p1[1] * p2[1];
-;;   s += p1[2] * p2[2];
-;;   s += p1[3] * p2[3];
+;;   s += p1ptr p2[0]
+;;   s += p1ptr p2[1];
+;;   s += p1ptr p2[2];
+;;   s += p1ptr p2[3];
 ;;   if (s >= lim)
 ;;      break;
 ;;   p1 += lx;
 ;;   p2 += lx;
 ;; }
-define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
+define i32 @reduction_with_br(ptr noalias nocapture readonly %blk1, ptr noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
 ; YAML:      --- !Passed
 ; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedHorizontalReduction
@@ -150,20 +148,18 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[S_020:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[IF_END:%.*]] ]
 ; CHECK-NEXT:    [[J_019:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
-; CHECK-NEXT:    [[P2_018:%.*]] = phi i32* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ]
-; CHECK-NEXT:    [[P1_017:%.*]] = phi i32* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P1_017]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[P2_018:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR16:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[P1_017:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[P1_017]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[P2_018]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP5]], [[S_020]]
 ; CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_017]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR16]] = getelementptr inbounds i32, i32* [[P2_018]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[P1_017]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR16]] = getelementptr inbounds i32, ptr [[P2_018]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_019]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[H]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]]
@@ -184,36 +180,36 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %if.end
   %s.020 = phi i32 [ 0, %for.body.lr.ph ], [ %add13, %if.end ]
   %j.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
-  %p2.018 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr16, %if.end ]
-  %p1.017 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end ]
-  %0 = load i32, i32* %p1.017, align 4
-  %1 = load i32, i32* %p2.018, align 4
+  %p2.018 = phi ptr [ %blk2, %for.body.lr.ph ], [ %add.ptr16, %if.end ]
+  %p1.017 = phi ptr [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end ]
+  %0 = load i32, ptr %p1.017, align 4
+  %1 = load i32, ptr %p2.018, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %s.020
-  %arrayidx2 = getelementptr inbounds i32, i32* %p1.017, i64 1
-  %2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %p2.018, i64 1
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %p1.017, i64 1
+  %2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %p2.018, i64 1
+  %3 = load i32, ptr %arrayidx3, align 4
   %mul4 = mul nsw i32 %3, %2
   %add5 = add nsw i32 %add, %mul4
-  %arrayidx6 = getelementptr inbounds i32, i32* %p1.017, i64 2
-  %4 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %p2.018, i64 2
-  %5 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %p1.017, i64 2
+  %4 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %p2.018, i64 2
+  %5 = load i32, ptr %arrayidx7, align 4
   %mul8 = mul nsw i32 %5, %4
   %add9 = add nsw i32 %add5, %mul8
-  %arrayidx10 = getelementptr inbounds i32, i32* %p1.017, i64 3
-  %6 = load i32, i32* %arrayidx10, align 4
-  %arrayidx11 = getelementptr inbounds i32, i32* %p2.018, i64 3
-  %7 = load i32, i32* %arrayidx11, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %p1.017, i64 3
+  %6 = load i32, ptr %arrayidx10, align 4
+  %arrayidx11 = getelementptr inbounds i32, ptr %p2.018, i64 3
+  %7 = load i32, ptr %arrayidx11, align 4
   %mul12 = mul nsw i32 %7, %6
   %add13 = add nsw i32 %add9, %mul12
   %cmp14 = icmp slt i32 %add13, %lim
   br i1 %cmp14, label %if.end, label %for.end.loopexit
 
 if.end:                                           ; preds = %for.body
-  %add.ptr = getelementptr inbounds i32, i32* %p1.017, i64 %idx.ext
-  %add.ptr16 = getelementptr inbounds i32, i32* %p2.018, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i32, ptr %p1.017, i64 %idx.ext
+  %add.ptr16 = getelementptr inbounds i32, ptr %p2.018, i64 %idx.ext
   %inc = add nuw nsw i32 %j.019, 1
   %cmp = icmp slt i32 %inc, %h
   br i1 %cmp, label %for.body, label %for.end.loopexit
@@ -236,7 +232,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '10'
 
-define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
+define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
 ; CHECK-LABEL: @test_unrolled_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_43:%.*]] = icmp sgt i32 [[H:%.*]], 0
@@ -247,13 +243,11 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[S_047:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[IF_END_86:%.*]] ]
 ; CHECK-NEXT:    [[J_046:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END_86]] ]
-; CHECK-NEXT:    [[P2_045:%.*]] = phi i8* [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
-; CHECK-NEXT:    [[P1_044:%.*]] = phi i8* [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1_044]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ]
+; CHECK-NEXT:    [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[P1_044]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[P2_045]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer
@@ -264,8 +258,8 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
 ; CHECK-NEXT:    [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
 ; CHECK:       if.end.86:
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[P1_044]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR88]] = getelementptr inbounds i8, i8* [[P2_045]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[P1_044]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR88]] = getelementptr inbounds i8, ptr [[P2_045]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_046]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[H]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]]
@@ -286,88 +280,88 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %if.end.86
   %s.047 = phi i32 [ 0, %for.body.lr.ph ], [ %add82, %if.end.86 ]
   %j.046 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end.86 ]
-  %p2.045 = phi i8* [ %blk2, %for.body.lr.ph ], [ %add.ptr88, %if.end.86 ]
-  %p1.044 = phi i8* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.86 ]
-  %0 = load i8, i8* %p1.044, align 1
+  %p2.045 = phi ptr [ %blk2, %for.body.lr.ph ], [ %add.ptr88, %if.end.86 ]
+  %p1.044 = phi ptr [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.86 ]
+  %0 = load i8, ptr %p1.044, align 1
   %conv = zext i8 %0 to i32
-  %1 = load i8, i8* %p2.045, align 1
+  %1 = load i8, ptr %p2.045, align 1
   %conv2 = zext i8 %1 to i32
   %sub = sub nsw i32 %conv, %conv2
   %cmp3 = icmp slt i32 %sub, 0
   %sub5 = sub nsw i32 0, %sub
   %sub5.sub = select i1 %cmp3, i32 %sub5, i32 %sub
   %add = add nsw i32 %sub5.sub, %s.047
-  %arrayidx6 = getelementptr inbounds i8, i8* %p1.044, i64 1
-  %2 = load i8, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %p1.044, i64 1
+  %2 = load i8, ptr %arrayidx6, align 1
   %conv7 = zext i8 %2 to i32
-  %arrayidx8 = getelementptr inbounds i8, i8* %p2.045, i64 1
-  %3 = load i8, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %p2.045, i64 1
+  %3 = load i8, ptr %arrayidx8, align 1
   %conv9 = zext i8 %3 to i32
   %sub10 = sub nsw i32 %conv7, %conv9
   %cmp11 = icmp slt i32 %sub10, 0
   %sub14 = sub nsw i32 0, %sub10
   %v.1 = select i1 %cmp11, i32 %sub14, i32 %sub10
   %add16 = add nsw i32 %add, %v.1
-  %arrayidx17 = getelementptr inbounds i8, i8* %p1.044, i64 2
-  %4 = load i8, i8* %arrayidx17, align 1
+  %arrayidx17 = getelementptr inbounds i8, ptr %p1.044, i64 2
+  %4 = load i8, ptr %arrayidx17, align 1
   %conv18 = zext i8 %4 to i32
-  %arrayidx19 = getelementptr inbounds i8, i8* %p2.045, i64 2
-  %5 = load i8, i8* %arrayidx19, align 1
+  %arrayidx19 = getelementptr inbounds i8, ptr %p2.045, i64 2
+  %5 = load i8, ptr %arrayidx19, align 1
   %conv20 = zext i8 %5 to i32
   %sub21 = sub nsw i32 %conv18, %conv20
   %cmp22 = icmp slt i32 %sub21, 0
   %sub25 = sub nsw i32 0, %sub21
   %sub25.sub21 = select i1 %cmp22, i32 %sub25, i32 %sub21
   %add27 = add nsw i32 %add16, %sub25.sub21
-  %arrayidx28 = getelementptr inbounds i8, i8* %p1.044, i64 3
-  %6 = load i8, i8* %arrayidx28, align 1
+  %arrayidx28 = getelementptr inbounds i8, ptr %p1.044, i64 3
+  %6 = load i8, ptr %arrayidx28, align 1
   %conv29 = zext i8 %6 to i32
-  %arrayidx30 = getelementptr inbounds i8, i8* %p2.045, i64 3
-  %7 = load i8, i8* %arrayidx30, align 1
+  %arrayidx30 = getelementptr inbounds i8, ptr %p2.045, i64 3
+  %7 = load i8, ptr %arrayidx30, align 1
   %conv31 = zext i8 %7 to i32
   %sub32 = sub nsw i32 %conv29, %conv31
   %cmp33 = icmp slt i32 %sub32, 0
   %sub36 = sub nsw i32 0, %sub32
   %v.3 = select i1 %cmp33, i32 %sub36, i32 %sub32
   %add38 = add nsw i32 %add27, %v.3
-  %arrayidx39 = getelementptr inbounds i8, i8* %p1.044, i64 4
-  %8 = load i8, i8* %arrayidx39, align 1
+  %arrayidx39 = getelementptr inbounds i8, ptr %p1.044, i64 4
+  %8 = load i8, ptr %arrayidx39, align 1
   %conv40 = zext i8 %8 to i32
-  %arrayidx41 = getelementptr inbounds i8, i8* %p2.045, i64 4
-  %9 = load i8, i8* %arrayidx41, align 1
+  %arrayidx41 = getelementptr inbounds i8, ptr %p2.045, i64 4
+  %9 = load i8, ptr %arrayidx41, align 1
   %conv42 = zext i8 %9 to i32
   %sub43 = sub nsw i32 %conv40, %conv42
   %cmp44 = icmp slt i32 %sub43, 0
   %sub47 = sub nsw i32 0, %sub43
   %sub47.sub43 = select i1 %cmp44, i32 %sub47, i32 %sub43
   %add49 = add nsw i32 %add38, %sub47.sub43
-  %arrayidx50 = getelementptr inbounds i8, i8* %p1.044, i64 5
-  %10 = load i8, i8* %arrayidx50, align 1
+  %arrayidx50 = getelementptr inbounds i8, ptr %p1.044, i64 5
+  %10 = load i8, ptr %arrayidx50, align 1
   %conv51 = zext i8 %10 to i32
-  %arrayidx52 = getelementptr inbounds i8, i8* %p2.045, i64 5
-  %11 = load i8, i8* %arrayidx52, align 1
+  %arrayidx52 = getelementptr inbounds i8, ptr %p2.045, i64 5
+  %11 = load i8, ptr %arrayidx52, align 1
   %conv53 = zext i8 %11 to i32
   %sub54 = sub nsw i32 %conv51, %conv53
   %cmp55 = icmp slt i32 %sub54, 0
   %sub58 = sub nsw i32 0, %sub54
   %v.5 = select i1 %cmp55, i32 %sub58, i32 %sub54
   %add60 = add nsw i32 %add49, %v.5
-  %arrayidx61 = getelementptr inbounds i8, i8* %p1.044, i64 6
-  %12 = load i8, i8* %arrayidx61, align 1
+  %arrayidx61 = getelementptr inbounds i8, ptr %p1.044, i64 6
+  %12 = load i8, ptr %arrayidx61, align 1
   %conv62 = zext i8 %12 to i32
-  %arrayidx63 = getelementptr inbounds i8, i8* %p2.045, i64 6
-  %13 = load i8, i8* %arrayidx63, align 1
+  %arrayidx63 = getelementptr inbounds i8, ptr %p2.045, i64 6
+  %13 = load i8, ptr %arrayidx63, align 1
   %conv64 = zext i8 %13 to i32
   %sub65 = sub nsw i32 %conv62, %conv64
   %cmp66 = icmp slt i32 %sub65, 0
   %sub69 = sub nsw i32 0, %sub65
   %sub69.sub65 = select i1 %cmp66, i32 %sub69, i32 %sub65
   %add71 = add nsw i32 %add60, %sub69.sub65
-  %arrayidx72 = getelementptr inbounds i8, i8* %p1.044, i64 7
-  %14 = load i8, i8* %arrayidx72, align 1
+  %arrayidx72 = getelementptr inbounds i8, ptr %p1.044, i64 7
+  %14 = load i8, ptr %arrayidx72, align 1
   %conv73 = zext i8 %14 to i32
-  %arrayidx74 = getelementptr inbounds i8, i8* %p2.045, i64 7
-  %15 = load i8, i8* %arrayidx74, align 1
+  %arrayidx74 = getelementptr inbounds i8, ptr %p2.045, i64 7
+  %15 = load i8, ptr %arrayidx74, align 1
   %conv75 = zext i8 %15 to i32
   %sub76 = sub nsw i32 %conv73, %conv75
   %cmp77 = icmp slt i32 %sub76, 0
@@ -378,8 +372,8 @@ for.body:                                         ; preds = %for.body.lr.ph, %if
   br i1 %cmp83, label %if.end.86, label %for.end.loopexit
 
 if.end.86:                                        ; preds = %for.body
-  %add.ptr = getelementptr inbounds i8, i8* %p1.044, i64 %idx.ext
-  %add.ptr88 = getelementptr inbounds i8, i8* %p2.045, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i8, ptr %p1.044, i64 %idx.ext
+  %add.ptr88 = getelementptr inbounds i8, ptr %p2.045, i64 %idx.ext
   %inc = add nuw nsw i32 %j.046, 1
   %cmp = icmp slt i32 %inc, %h
   br i1 %cmp, label %for.body, label %for.end.loopexit

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
index 3332467fe149e..f2d905697a943 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
@@ -8,18 +8,18 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @vector() {
 ; CHECK-LABEL: @vector(
-; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <16 x i8> @vector.load(<16 x i8>* undef, i32 1)
-; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <16 x i8> @vector.load(<16 x i8>* undef, i32 2)
+; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <16 x i8> @vector.load(ptr undef, i32 1)
+; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <16 x i8> @vector.load(ptr undef, i32 2)
 ; CHECK-NEXT:    [[ADD:%.*]] = add <16 x i8> [[LOAD1]], [[LOAD0]]
-; CHECK-NEXT:    tail call void @vector.store(<16 x i8> [[ADD]], <16 x i8>* undef, i32 1)
+; CHECK-NEXT:    tail call void @vector.store(<16 x i8> [[ADD]], ptr undef, i32 1)
 ; CHECK-NEXT:    ret void
 ;
-  %load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1)
-  %load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2)
+  %load0 = tail call <16 x i8> @vector.load(ptr undef, i32 1)
+  %load1 = tail call <16 x i8> @vector.load(ptr undef, i32 2)
   %add = add <16 x i8> %load1, %load0
-  tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1)
+  tail call void @vector.store(<16 x i8> %add, ptr undef, i32 1)
   ret void
 }
 
-declare <16 x i8> @vector.load(<16 x i8>*, i32)
-declare void @vector.store(<16 x i8>, <16 x i8>*, i32)
+declare <16 x i8> @vector.load(ptr, i32)
+declare void @vector.store(<16 x i8>, ptr, i32)

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
index 61b679ce21bce..5f3ccc9e2336e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
@@ -12,14 +12,14 @@ target triple = "arm64-apple-ios5.0.0"
 ; CHECK: call void @g
 ; CHECK: store double
 ; CHECK: store double
-define void @f(double* %p, double* %q) {
-  %addr2 = getelementptr double, double* %q, i32 1
-  %addr = getelementptr double, double* %p, i32 1
-  %x = load double, double* %p
-  %y = load double, double* %addr
+define void @f(ptr %p, ptr %q) {
+  %addr2 = getelementptr double, ptr %q, i32 1
+  %addr = getelementptr double, ptr %p, i32 1
+  %x = load double, ptr %p
+  %y = load double, ptr %addr
   call void @g()
-  store double %x, double* %q
-  store double %y, double* %addr2
+  store double %x, ptr %q
+  store double %y, ptr %addr2
   ret void
 }
 declare void @g()
@@ -28,19 +28,19 @@ declare void @g()
 ;
 ; CHECK: store <2 x double>
 ; CHECK: load <2 x double>
-define void @f2(double* %p, double* %q) {
+define void @f2(ptr %p, ptr %q) {
 entry:
   br label %loop
 
 loop:
   %p1 = phi double [0.0, %entry], [%x, %loop]
   %p2 = phi double [0.0, %entry], [%y, %loop]
-  %addr2 = getelementptr double, double* %q, i32 1
-  %addr = getelementptr double, double* %p, i32 1
-  store double %p1, double* %q
-  store double %p2, double* %addr2
+  %addr2 = getelementptr double, ptr %q, i32 1
+  %addr = getelementptr double, ptr %p, i32 1
+  store double %p1, ptr %q
+  store double %p2, ptr %addr2
 
-  %x = load double, double* %p
-  %y = load double, double* %addr
+  %x = load double, ptr %p
+  %y = load double, ptr %addr
   br label %loop
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 3297d0306b595..94d12f23456f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -4,52 +4,52 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"
 
-define i16 @reduce_allstrided(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
+define i16 @reduce_allstrided(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, i32 noundef %stride) {
 ; CHECK-LABEL: @reduce_allstrided(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[X:%.*]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[X:%.*]], align 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[MUL2:%.*]] = shl nsw i32 [[STRIDE]], 1
 ; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[MUL2]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM3]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2
 ; CHECK-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[STRIDE]], 3
 ; CHECK-NEXT:    [[IDXPROM6:%.*]] = sext i32 [[MUL5]] to i64
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM6]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
 ; CHECK-NEXT:    [[MUL8:%.*]] = shl nsw i32 [[STRIDE]], 2
 ; CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[MUL8]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM9]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM9]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; CHECK-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[STRIDE]], 5
 ; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[MUL11]] to i64
-; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM12]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX13]], align 2
 ; CHECK-NEXT:    [[MUL14:%.*]] = mul nsw i32 [[STRIDE]], 6
 ; CHECK-NEXT:    [[IDXPROM15:%.*]] = sext i32 [[MUL14]] to i64
-; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM15]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM15]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX16]], align 2
 ; CHECK-NEXT:    [[MUL17:%.*]] = mul nsw i32 [[STRIDE]], 7
 ; CHECK-NEXT:    [[IDXPROM18:%.*]] = sext i32 [[MUL17]] to i64
-; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM18]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[Y:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2
-; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM3]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX26]], align 2
-; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX29]], align 2
-; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM9]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2
-; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM12]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX35]], align 2
-; CHECK-NEXT:    [[ARRAYIDX38:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM15]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX38]], align 2
-; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM18]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX41]], align 2
+; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM18]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX23]], align 2
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX26]], align 2
+; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX29]], align 2
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX32]], align 2
+; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX35]], align 2
+; CHECK-NEXT:    [[ARRAYIDX38:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM15]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX38]], align 2
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM18]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX41]], align 2
 ; CHECK-NEXT:    [[MUL43:%.*]] = mul i16 [[TMP8]], [[TMP0]]
 ; CHECK-NEXT:    [[MUL48:%.*]] = mul i16 [[TMP9]], [[TMP1]]
 ; CHECK-NEXT:    [[ADD49:%.*]] = add i16 [[MUL48]], [[MUL43]]
@@ -68,49 +68,49 @@ define i16 @reduce_allstrided(i16* nocapture noundef readonly %x, i16* nocapture
 ; CHECK-NEXT:    ret i16 [[ADD85]]
 ;
 entry:
-  %0 = load i16, i16* %x, align 2
+  %0 = load i16, ptr %x, align 2
   %idxprom = sext i32 %stride to i64
-  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 %idxprom
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %x, i64 %idxprom
+  %1 = load i16, ptr %arrayidx1, align 2
   %mul2 = shl nsw i32 %stride, 1
   %idxprom3 = sext i32 %mul2 to i64
-  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom3
-  %2 = load i16, i16* %arrayidx4, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %x, i64 %idxprom3
+  %2 = load i16, ptr %arrayidx4, align 2
   %mul5 = mul nsw i32 %stride, 3
   %idxprom6 = sext i32 %mul5 to i64
-  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
-  %3 = load i16, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %x, i64 %idxprom6
+  %3 = load i16, ptr %arrayidx7, align 2
   %mul8 = shl nsw i32 %stride, 2
   %idxprom9 = sext i32 %mul8 to i64
-  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
-  %4 = load i16, i16* %arrayidx10, align 2
+  %arrayidx10 = getelementptr inbounds i16, ptr %x, i64 %idxprom9
+  %4 = load i16, ptr %arrayidx10, align 2
   %mul11 = mul nsw i32 %stride, 5
   %idxprom12 = sext i32 %mul11 to i64
-  %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12
-  %5 = load i16, i16* %arrayidx13, align 2
+  %arrayidx13 = getelementptr inbounds i16, ptr %x, i64 %idxprom12
+  %5 = load i16, ptr %arrayidx13, align 2
   %mul14 = mul nsw i32 %stride, 6
   %idxprom15 = sext i32 %mul14 to i64
-  %arrayidx16 = getelementptr inbounds i16, i16* %x, i64 %idxprom15
-  %6 = load i16, i16* %arrayidx16, align 2
+  %arrayidx16 = getelementptr inbounds i16, ptr %x, i64 %idxprom15
+  %6 = load i16, ptr %arrayidx16, align 2
   %mul17 = mul nsw i32 %stride, 7
   %idxprom18 = sext i32 %mul17 to i64
-  %arrayidx19 = getelementptr inbounds i16, i16* %x, i64 %idxprom18
-  %7 = load i16, i16* %arrayidx19, align 2
-  %8 = load i16, i16* %y, align 2
-  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom
-  %9 = load i16, i16* %arrayidx23, align 2
-  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom3
-  %10 = load i16, i16* %arrayidx26, align 2
-  %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
-  %11 = load i16, i16* %arrayidx29, align 2
-  %arrayidx32 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
-  %12 = load i16, i16* %arrayidx32, align 2
-  %arrayidx35 = getelementptr inbounds i16, i16* %y, i64 %idxprom12
-  %13 = load i16, i16* %arrayidx35, align 2
-  %arrayidx38 = getelementptr inbounds i16, i16* %y, i64 %idxprom15
-  %14 = load i16, i16* %arrayidx38, align 2
-  %arrayidx41 = getelementptr inbounds i16, i16* %y, i64 %idxprom18
-  %15 = load i16, i16* %arrayidx41, align 2
+  %arrayidx19 = getelementptr inbounds i16, ptr %x, i64 %idxprom18
+  %7 = load i16, ptr %arrayidx19, align 2
+  %8 = load i16, ptr %y, align 2
+  %arrayidx23 = getelementptr inbounds i16, ptr %y, i64 %idxprom
+  %9 = load i16, ptr %arrayidx23, align 2
+  %arrayidx26 = getelementptr inbounds i16, ptr %y, i64 %idxprom3
+  %10 = load i16, ptr %arrayidx26, align 2
+  %arrayidx29 = getelementptr inbounds i16, ptr %y, i64 %idxprom6
+  %11 = load i16, ptr %arrayidx29, align 2
+  %arrayidx32 = getelementptr inbounds i16, ptr %y, i64 %idxprom9
+  %12 = load i16, ptr %arrayidx32, align 2
+  %arrayidx35 = getelementptr inbounds i16, ptr %y, i64 %idxprom12
+  %13 = load i16, ptr %arrayidx35, align 2
+  %arrayidx38 = getelementptr inbounds i16, ptr %y, i64 %idxprom15
+  %14 = load i16, ptr %arrayidx38, align 2
+  %arrayidx41 = getelementptr inbounds i16, ptr %y, i64 %idxprom18
+  %15 = load i16, ptr %arrayidx41, align 2
   %mul43 = mul i16 %8, %0
   %mul48 = mul i16 %9, %1
   %add49 = add i16 %mul48, %mul43
@@ -129,50 +129,50 @@ entry:
   ret i16 %add85
 }
 
-define i16 @reduce_blockstrided2(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
+define i16 @reduce_blockstrided2(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, i32 noundef %stride) {
 ; CHECK-LABEL: @reduce_blockstrided2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[X:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[X:%.*]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
 ; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[STRIDE]], 1
 ; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM4]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1
 ; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM7]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM7]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX8]], align 2
 ; CHECK-NEXT:    [[ADD10:%.*]] = or i32 [[MUL]], 1
 ; CHECK-NEXT:    [[IDXPROM11:%.*]] = sext i32 [[ADD10]] to i64
-; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM11]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX12]], align 2
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM11]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX12]], align 2
 ; CHECK-NEXT:    [[MUL13:%.*]] = mul nsw i32 [[STRIDE]], 3
 ; CHECK-NEXT:    [[IDXPROM15:%.*]] = sext i32 [[MUL13]] to i64
-; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM15]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM15]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX16]], align 2
 ; CHECK-NEXT:    [[ADD18:%.*]] = add nsw i32 [[MUL13]], 1
 ; CHECK-NEXT:    [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64
-; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM19]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[Y:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX24]], align 2
-; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2
-; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM15]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2
-; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 1
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX33]], align 2
-; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM4]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX36]], align 2
-; CHECK-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX40]], align 2
-; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM19]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX44]], align 2
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM19]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX20]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX24]], align 2
+; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM15]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX32]], align 2
+; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX33]], align 2
+; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX36]], align 2
+; CHECK-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM11]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX40]], align 2
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM19]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX44]], align 2
 ; CHECK-NEXT:    [[MUL46:%.*]] = mul i16 [[TMP8]], [[TMP0]]
 ; CHECK-NEXT:    [[MUL52:%.*]] = mul i16 [[TMP12]], [[TMP1]]
 ; CHECK-NEXT:    [[MUL58:%.*]] = mul i16 [[TMP9]], [[TMP2]]
@@ -191,47 +191,47 @@ define i16 @reduce_blockstrided2(i16* nocapture noundef readonly %x, i16* nocapt
 ; CHECK-NEXT:    ret i16 [[ADD89]]
 ;
 entry:
-  %0 = load i16, i16* %x, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
-  %1 = load i16, i16* %arrayidx1, align 2
+  %0 = load i16, ptr %x, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %x, i64 1
+  %1 = load i16, ptr %arrayidx1, align 2
   %idxprom = sext i32 %stride to i64
-  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 %idxprom
-  %2 = load i16, i16* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %x, i64 %idxprom
+  %2 = load i16, ptr %arrayidx2, align 2
   %add3 = add nsw i32 %stride, 1
   %idxprom4 = sext i32 %add3 to i64
-  %arrayidx5 = getelementptr inbounds i16, i16* %x, i64 %idxprom4
-  %3 = load i16, i16* %arrayidx5, align 2
+  %arrayidx5 = getelementptr inbounds i16, ptr %x, i64 %idxprom4
+  %3 = load i16, ptr %arrayidx5, align 2
   %mul = shl nsw i32 %stride, 1
   %idxprom7 = sext i32 %mul to i64
-  %arrayidx8 = getelementptr inbounds i16, i16* %x, i64 %idxprom7
-  %4 = load i16, i16* %arrayidx8, align 2
+  %arrayidx8 = getelementptr inbounds i16, ptr %x, i64 %idxprom7
+  %4 = load i16, ptr %arrayidx8, align 2
   %add10 = or i32 %mul, 1
   %idxprom11 = sext i32 %add10 to i64
-  %arrayidx12 = getelementptr inbounds i16, i16* %x, i64 %idxprom11
-  %5 = load i16, i16* %arrayidx12, align 2
+  %arrayidx12 = getelementptr inbounds i16, ptr %x, i64 %idxprom11
+  %5 = load i16, ptr %arrayidx12, align 2
   %mul13 = mul nsw i32 %stride, 3
   %idxprom15 = sext i32 %mul13 to i64
-  %arrayidx16 = getelementptr inbounds i16, i16* %x, i64 %idxprom15
-  %6 = load i16, i16* %arrayidx16, align 2
+  %arrayidx16 = getelementptr inbounds i16, ptr %x, i64 %idxprom15
+  %6 = load i16, ptr %arrayidx16, align 2
   %add18 = add nsw i32 %mul13, 1
   %idxprom19 = sext i32 %add18 to i64
-  %arrayidx20 = getelementptr inbounds i16, i16* %x, i64 %idxprom19
-  %7 = load i16, i16* %arrayidx20, align 2
-  %8 = load i16, i16* %y, align 2
-  %arrayidx24 = getelementptr inbounds i16, i16* %y, i64 %idxprom
-  %9 = load i16, i16* %arrayidx24, align 2
-  %arrayidx28 = getelementptr inbounds i16, i16* %y, i64 %idxprom7
-  %10 = load i16, i16* %arrayidx28, align 2
-  %arrayidx32 = getelementptr inbounds i16, i16* %y, i64 %idxprom15
-  %11 = load i16, i16* %arrayidx32, align 2
-  %arrayidx33 = getelementptr inbounds i16, i16* %y, i64 1
-  %12 = load i16, i16* %arrayidx33, align 2
-  %arrayidx36 = getelementptr inbounds i16, i16* %y, i64 %idxprom4
-  %13 = load i16, i16* %arrayidx36, align 2
-  %arrayidx40 = getelementptr inbounds i16, i16* %y, i64 %idxprom11
-  %14 = load i16, i16* %arrayidx40, align 2
-  %arrayidx44 = getelementptr inbounds i16, i16* %y, i64 %idxprom19
-  %15 = load i16, i16* %arrayidx44, align 2
+  %arrayidx20 = getelementptr inbounds i16, ptr %x, i64 %idxprom19
+  %7 = load i16, ptr %arrayidx20, align 2
+  %8 = load i16, ptr %y, align 2
+  %arrayidx24 = getelementptr inbounds i16, ptr %y, i64 %idxprom
+  %9 = load i16, ptr %arrayidx24, align 2
+  %arrayidx28 = getelementptr inbounds i16, ptr %y, i64 %idxprom7
+  %10 = load i16, ptr %arrayidx28, align 2
+  %arrayidx32 = getelementptr inbounds i16, ptr %y, i64 %idxprom15
+  %11 = load i16, ptr %arrayidx32, align 2
+  %arrayidx33 = getelementptr inbounds i16, ptr %y, i64 1
+  %12 = load i16, ptr %arrayidx33, align 2
+  %arrayidx36 = getelementptr inbounds i16, ptr %y, i64 %idxprom4
+  %13 = load i16, ptr %arrayidx36, align 2
+  %arrayidx40 = getelementptr inbounds i16, ptr %y, i64 %idxprom11
+  %14 = load i16, ptr %arrayidx40, align 2
+  %arrayidx44 = getelementptr inbounds i16, ptr %y, i64 %idxprom19
+  %15 = load i16, ptr %arrayidx44, align 2
   %mul46 = mul i16 %8, %0
   %mul52 = mul i16 %12, %1
   %mul58 = mul i16 %9, %2
@@ -250,36 +250,36 @@ entry:
   ret i16 %add89
 }
 
-define i16 @reduce_blockstrided3(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
+define i16 @reduce_blockstrided3(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, i32 noundef %stride) {
 ; CHECK-LABEL: @reduce_blockstrided3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L0:%.*]] = load i16, i16* [[X:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 1
-; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 2
-; CHECK-NEXT:    [[L2:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+; CHECK-NEXT:    [[L0:%.*]] = load i16, ptr [[X:%.*]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 1
+; CHECK-NEXT:    [[L1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 2
+; CHECK-NEXT:    [[L2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[L4:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[L4:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2
 ; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[STRIDE]], 1
 ; CHECK-NEXT:    [[IDXPROM6:%.*]] = sext i32 [[ADD5]] to i64
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM6]]
-; CHECK-NEXT:    [[L5:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM6]]
+; CHECK-NEXT:    [[L5:%.*]] = load i16, ptr [[ARRAYIDX7]], align 2
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[STRIDE]], 2
 ; CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[ADD8]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM9]]
-; CHECK-NEXT:    [[L6:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
-; CHECK-NEXT:    [[L8:%.*]] = load i16, i16* [[Y:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 1
-; CHECK-NEXT:    [[L9:%.*]] = load i16, i16* [[ARRAYIDX15]], align 2
-; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 2
-; CHECK-NEXT:    [[L10:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2
-; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[L12:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2
-; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM6]]
-; CHECK-NEXT:    [[L13:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2
-; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM9]]
-; CHECK-NEXT:    [[L14:%.*]] = load i16, i16* [[ARRAYIDX26]], align 2
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM9]]
+; CHECK-NEXT:    [[L6:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
+; CHECK-NEXT:    [[L8:%.*]] = load i16, ptr [[Y:%.*]], align 2
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 1
+; CHECK-NEXT:    [[L9:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 2
+; CHECK-NEXT:    [[L10:%.*]] = load i16, ptr [[ARRAYIDX16]], align 2
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[L12:%.*]] = load i16, ptr [[ARRAYIDX20]], align 2
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM6]]
+; CHECK-NEXT:    [[L13:%.*]] = load i16, ptr [[ARRAYIDX23]], align 2
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM9]]
+; CHECK-NEXT:    [[L14:%.*]] = load i16, ptr [[ARRAYIDX26]], align 2
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i16 [[L8]], [[L0]]
 ; CHECK-NEXT:    [[MUL36:%.*]] = mul i16 [[L9]], [[L1]]
 ; CHECK-NEXT:    [[ADD37:%.*]] = add i16 [[MUL36]], [[MUL]]
@@ -294,35 +294,35 @@ define i16 @reduce_blockstrided3(i16* nocapture noundef readonly %x, i16* nocapt
 ; CHECK-NEXT:    ret i16 [[ADD73]]
 ;
 entry:
-  %l0 = load i16, i16* %x, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
-  %l1 = load i16, i16* %arrayidx1, align 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2
-  %l2 = load i16, i16* %arrayidx2, align 2
+  %l0 = load i16, ptr %x, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %x, i64 1
+  %l1 = load i16, ptr %arrayidx1, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %x, i64 2
+  %l2 = load i16, ptr %arrayidx2, align 2
   %idxprom = sext i32 %stride to i64
-  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom
-  %l4 = load i16, i16* %arrayidx4, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %x, i64 %idxprom
+  %l4 = load i16, ptr %arrayidx4, align 2
   %add5 = add nsw i32 %stride, 1
   %idxprom6 = sext i32 %add5 to i64
-  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
-  %l5 = load i16, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %x, i64 %idxprom6
+  %l5 = load i16, ptr %arrayidx7, align 2
   %add8 = add nsw i32 %stride, 2
   %idxprom9 = sext i32 %add8 to i64
-  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
-  %l6 = load i16, i16* %arrayidx10, align 2
+  %arrayidx10 = getelementptr inbounds i16, ptr %x, i64 %idxprom9
+  %l6 = load i16, ptr %arrayidx10, align 2
   %add11 = add nsw i32 %stride, 3
   %idxprom12 = sext i32 %add11 to i64
-  %l8 = load i16, i16* %y, align 2
-  %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1
-  %l9 = load i16, i16* %arrayidx15, align 2
-  %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2
-  %l10 = load i16, i16* %arrayidx16, align 2
-  %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom
-  %l12 = load i16, i16* %arrayidx20, align 2
-  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
-  %l13 = load i16, i16* %arrayidx23, align 2
-  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
-  %l14 = load i16, i16* %arrayidx26, align 2
+  %l8 = load i16, ptr %y, align 2
+  %arrayidx15 = getelementptr inbounds i16, ptr %y, i64 1
+  %l9 = load i16, ptr %arrayidx15, align 2
+  %arrayidx16 = getelementptr inbounds i16, ptr %y, i64 2
+  %l10 = load i16, ptr %arrayidx16, align 2
+  %arrayidx20 = getelementptr inbounds i16, ptr %y, i64 %idxprom
+  %l12 = load i16, ptr %arrayidx20, align 2
+  %arrayidx23 = getelementptr inbounds i16, ptr %y, i64 %idxprom6
+  %l13 = load i16, ptr %arrayidx23, align 2
+  %arrayidx26 = getelementptr inbounds i16, ptr %y, i64 %idxprom9
+  %l14 = load i16, ptr %arrayidx26, align 2
   %mul = mul i16 %l8, %l0
   %mul36 = mul i16 %l9, %l1
   %add37 = add i16 %mul36, %mul
@@ -337,20 +337,16 @@ entry:
   ret i16 %add73
 }
 
-define i16 @reduce_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
+define i16 @reduce_blockstrided4(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, i32 noundef %stride) {
 ; CHECK-LABEL: @reduce_blockstrided4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[X]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX4]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[Y]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX20]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[TMP6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[X]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr [[Y]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX20]], align 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i16> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i16> [[TMP7]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -358,43 +354,43 @@ define i16 @reduce_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapt
 ; CHECK-NEXT:    ret i16 [[TMP11]]
 ;
 entry:
-  %0 = load i16, i16* %x, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
-  %1 = load i16, i16* %arrayidx1, align 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2
-  %2 = load i16, i16* %arrayidx2, align 2
-  %arrayidx3 = getelementptr inbounds i16, i16* %x, i64 3
-  %3 = load i16, i16* %arrayidx3, align 2
+  %0 = load i16, ptr %x, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %x, i64 1
+  %1 = load i16, ptr %arrayidx1, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %x, i64 2
+  %2 = load i16, ptr %arrayidx2, align 2
+  %arrayidx3 = getelementptr inbounds i16, ptr %x, i64 3
+  %3 = load i16, ptr %arrayidx3, align 2
   %idxprom = sext i32 %stride to i64
-  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom
-  %4 = load i16, i16* %arrayidx4, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %x, i64 %idxprom
+  %4 = load i16, ptr %arrayidx4, align 2
   %add5 = add nsw i32 %stride, 1
   %idxprom6 = sext i32 %add5 to i64
-  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
-  %5 = load i16, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %x, i64 %idxprom6
+  %5 = load i16, ptr %arrayidx7, align 2
   %add8 = add nsw i32 %stride, 2
   %idxprom9 = sext i32 %add8 to i64
-  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
-  %6 = load i16, i16* %arrayidx10, align 2
+  %arrayidx10 = getelementptr inbounds i16, ptr %x, i64 %idxprom9
+  %6 = load i16, ptr %arrayidx10, align 2
   %add11 = add nsw i32 %stride, 3
   %idxprom12 = sext i32 %add11 to i64
-  %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12
-  %7 = load i16, i16* %arrayidx13, align 2
-  %8 = load i16, i16* %y, align 2
-  %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1
-  %9 = load i16, i16* %arrayidx15, align 2
-  %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2
-  %10 = load i16, i16* %arrayidx16, align 2
-  %arrayidx17 = getelementptr inbounds i16, i16* %y, i64 3
-  %11 = load i16, i16* %arrayidx17, align 2
-  %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom
-  %12 = load i16, i16* %arrayidx20, align 2
-  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
-  %13 = load i16, i16* %arrayidx23, align 2
-  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
-  %14 = load i16, i16* %arrayidx26, align 2
-  %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom12
-  %15 = load i16, i16* %arrayidx29, align 2
+  %arrayidx13 = getelementptr inbounds i16, ptr %x, i64 %idxprom12
+  %7 = load i16, ptr %arrayidx13, align 2
+  %8 = load i16, ptr %y, align 2
+  %arrayidx15 = getelementptr inbounds i16, ptr %y, i64 1
+  %9 = load i16, ptr %arrayidx15, align 2
+  %arrayidx16 = getelementptr inbounds i16, ptr %y, i64 2
+  %10 = load i16, ptr %arrayidx16, align 2
+  %arrayidx17 = getelementptr inbounds i16, ptr %y, i64 3
+  %11 = load i16, ptr %arrayidx17, align 2
+  %arrayidx20 = getelementptr inbounds i16, ptr %y, i64 %idxprom
+  %12 = load i16, ptr %arrayidx20, align 2
+  %arrayidx23 = getelementptr inbounds i16, ptr %y, i64 %idxprom6
+  %13 = load i16, ptr %arrayidx23, align 2
+  %arrayidx26 = getelementptr inbounds i16, ptr %y, i64 %idxprom9
+  %14 = load i16, ptr %arrayidx26, align 2
+  %arrayidx29 = getelementptr inbounds i16, ptr %y, i64 %idxprom12
+  %15 = load i16, ptr %arrayidx29, align 2
   %mul = mul i16 %8, %0
   %mul36 = mul i16 %9, %1
   %add37 = add i16 %mul36, %mul
@@ -413,39 +409,31 @@ entry:
   ret i16 %add73
 }
 
-define i32 @reduce_blockstrided4x4(i8* nocapture noundef readonly %p1, i32 noundef %off1, i8* nocapture noundef readonly %p2, i32 noundef %off2) {
+define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 noundef %off1, ptr nocapture noundef readonly %p2, i32 noundef %off2) {
 ; CHECK-LABEL: @reduce_blockstrided4x4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[P2]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i32>
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
+; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -460,195 +448,195 @@ entry:
   %idx.ext = sext i32 %off1 to i64
   %idx.ext63 = sext i32 %off2 to i64
 
-  %0 = load i8, i8* %p1, align 1
+  %0 = load i8, ptr %p1, align 1
   %conv = zext i8 %0 to i32
-  %1 = load i8, i8* %p2, align 1
+  %1 = load i8, ptr %p2, align 1
   %conv2 = zext i8 %1 to i32
-  %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4
-  %2 = load i8, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %p1, i64 4
+  %2 = load i8, ptr %arrayidx3, align 1
   %conv4 = zext i8 %2 to i32
-  %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4
-  %3 = load i8, i8* %arrayidx5, align 1
+  %arrayidx5 = getelementptr inbounds i8, ptr %p2, i64 4
+  %3 = load i8, ptr %arrayidx5, align 1
   %conv6 = zext i8 %3 to i32
-  %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1
-  %4 = load i8, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %p1, i64 1
+  %4 = load i8, ptr %arrayidx8, align 1
   %conv9 = zext i8 %4 to i32
-  %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1
-  %5 = load i8, i8* %arrayidx10, align 1
+  %arrayidx10 = getelementptr inbounds i8, ptr %p2, i64 1
+  %5 = load i8, ptr %arrayidx10, align 1
   %conv11 = zext i8 %5 to i32
-  %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5
-  %6 = load i8, i8* %arrayidx13, align 1
+  %arrayidx13 = getelementptr inbounds i8, ptr %p1, i64 5
+  %6 = load i8, ptr %arrayidx13, align 1
   %conv14 = zext i8 %6 to i32
-  %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5
-  %7 = load i8, i8* %arrayidx15, align 1
+  %arrayidx15 = getelementptr inbounds i8, ptr %p2, i64 5
+  %7 = load i8, ptr %arrayidx15, align 1
   %conv16 = zext i8 %7 to i32
-  %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2
-  %8 = load i8, i8* %arrayidx20, align 1
+  %arrayidx20 = getelementptr inbounds i8, ptr %p1, i64 2
+  %8 = load i8, ptr %arrayidx20, align 1
   %conv21 = zext i8 %8 to i32
-  %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2
-  %9 = load i8, i8* %arrayidx22, align 1
+  %arrayidx22 = getelementptr inbounds i8, ptr %p2, i64 2
+  %9 = load i8, ptr %arrayidx22, align 1
   %conv23 = zext i8 %9 to i32
-  %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6
-  %10 = load i8, i8* %arrayidx25, align 1
+  %arrayidx25 = getelementptr inbounds i8, ptr %p1, i64 6
+  %10 = load i8, ptr %arrayidx25, align 1
   %conv26 = zext i8 %10 to i32
-  %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6
-  %11 = load i8, i8* %arrayidx27, align 1
+  %arrayidx27 = getelementptr inbounds i8, ptr %p2, i64 6
+  %11 = load i8, ptr %arrayidx27, align 1
   %conv28 = zext i8 %11 to i32
-  %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3
-  %12 = load i8, i8* %arrayidx32, align 1
+  %arrayidx32 = getelementptr inbounds i8, ptr %p1, i64 3
+  %12 = load i8, ptr %arrayidx32, align 1
   %conv33 = zext i8 %12 to i32
-  %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3
-  %13 = load i8, i8* %arrayidx34, align 1
+  %arrayidx34 = getelementptr inbounds i8, ptr %p2, i64 3
+  %13 = load i8, ptr %arrayidx34, align 1
   %conv35 = zext i8 %13 to i32
-  %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7
-  %14 = load i8, i8* %arrayidx37, align 1
+  %arrayidx37 = getelementptr inbounds i8, ptr %p1, i64 7
+  %14 = load i8, ptr %arrayidx37, align 1
   %conv38 = zext i8 %14 to i32
-  %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7
-  %15 = load i8, i8* %arrayidx39, align 1
+  %arrayidx39 = getelementptr inbounds i8, ptr %p2, i64 7
+  %15 = load i8, ptr %arrayidx39, align 1
   %conv40 = zext i8 %15 to i32
-  %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext
-  %16 = load i8, i8* %add.ptr, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %p1, i64 %idx.ext
+  %16 = load i8, ptr %add.ptr, align 1
   %conv.1 = zext i8 %16 to i32
-  %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63
-  %17 = load i8, i8* %add.ptr64, align 1
+  %add.ptr64 = getelementptr inbounds i8, ptr %p2, i64 %idx.ext63
+  %17 = load i8, ptr %add.ptr64, align 1
   %conv2.1 = zext i8 %17 to i32
-  %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4
-  %18 = load i8, i8* %arrayidx3.1, align 1
+  %arrayidx3.1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
+  %18 = load i8, ptr %arrayidx3.1, align 1
   %conv4.1 = zext i8 %18 to i32
-  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
-  %19 = load i8, i8* %arrayidx5.1, align 1
+  %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
+  %19 = load i8, ptr %arrayidx5.1, align 1
   %conv6.1 = zext i8 %19 to i32
-  %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1
-  %20 = load i8, i8* %arrayidx8.1, align 1
+  %arrayidx8.1 = getelementptr inbounds i8, ptr %add.ptr, i64 1
+  %20 = load i8, ptr %arrayidx8.1, align 1
   %conv9.1 = zext i8 %20 to i32
-  %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1
-  %21 = load i8, i8* %arrayidx10.1, align 1
+  %arrayidx10.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 1
+  %21 = load i8, ptr %arrayidx10.1, align 1
   %conv11.1 = zext i8 %21 to i32
-  %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
-  %22 = load i8, i8* %arrayidx13.1, align 1
+  %arrayidx13.1 = getelementptr inbounds i8, ptr %add.ptr, i64 5
+  %22 = load i8, ptr %arrayidx13.1, align 1
   %conv14.1 = zext i8 %22 to i32
-  %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5
-  %23 = load i8, i8* %arrayidx15.1, align 1
+  %arrayidx15.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 5
+  %23 = load i8, ptr %arrayidx15.1, align 1
   %conv16.1 = zext i8 %23 to i32
-  %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2
-  %24 = load i8, i8* %arrayidx20.1, align 1
+  %arrayidx20.1 = getelementptr inbounds i8, ptr %add.ptr, i64 2
+  %24 = load i8, ptr %arrayidx20.1, align 1
   %conv21.1 = zext i8 %24 to i32
-  %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2
-  %25 = load i8, i8* %arrayidx22.1, align 1
+  %arrayidx22.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 2
+  %25 = load i8, ptr %arrayidx22.1, align 1
   %conv23.1 = zext i8 %25 to i32
-  %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6
-  %26 = load i8, i8* %arrayidx25.1, align 1
+  %arrayidx25.1 = getelementptr inbounds i8, ptr %add.ptr, i64 6
+  %26 = load i8, ptr %arrayidx25.1, align 1
   %conv26.1 = zext i8 %26 to i32
-  %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6
-  %27 = load i8, i8* %arrayidx27.1, align 1
+  %arrayidx27.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 6
+  %27 = load i8, ptr %arrayidx27.1, align 1
   %conv28.1 = zext i8 %27 to i32
-  %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3
-  %28 = load i8, i8* %arrayidx32.1, align 1
+  %arrayidx32.1 = getelementptr inbounds i8, ptr %add.ptr, i64 3
+  %28 = load i8, ptr %arrayidx32.1, align 1
   %conv33.1 = zext i8 %28 to i32
-  %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3
-  %29 = load i8, i8* %arrayidx34.1, align 1
+  %arrayidx34.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 3
+  %29 = load i8, ptr %arrayidx34.1, align 1
   %conv35.1 = zext i8 %29 to i32
-  %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7
-  %30 = load i8, i8* %arrayidx37.1, align 1
+  %arrayidx37.1 = getelementptr inbounds i8, ptr %add.ptr, i64 7
+  %30 = load i8, ptr %arrayidx37.1, align 1
   %conv38.1 = zext i8 %30 to i32
-  %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7
-  %31 = load i8, i8* %arrayidx39.1, align 1
+  %arrayidx39.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 7
+  %31 = load i8, ptr %arrayidx39.1, align 1
   %conv40.1 = zext i8 %31 to i32
-  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext
-  %32 = load i8, i8* %add.ptr.1, align 1
+  %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext
+  %32 = load i8, ptr %add.ptr.1, align 1
   %conv.2 = zext i8 %32 to i32
-  %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63
-  %33 = load i8, i8* %add.ptr64.1, align 1
+  %add.ptr64.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 %idx.ext63
+  %33 = load i8, ptr %add.ptr64.1, align 1
   %conv2.2 = zext i8 %33 to i32
-  %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4
-  %34 = load i8, i8* %arrayidx3.2, align 1
+  %arrayidx3.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 4
+  %34 = load i8, ptr %arrayidx3.2, align 1
   %conv4.2 = zext i8 %34 to i32
-  %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4
-  %35 = load i8, i8* %arrayidx5.2, align 1
+  %arrayidx5.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 4
+  %35 = load i8, ptr %arrayidx5.2, align 1
   %conv6.2 = zext i8 %35 to i32
-  %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1
-  %36 = load i8, i8* %arrayidx8.2, align 1
+  %arrayidx8.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 1
+  %36 = load i8, ptr %arrayidx8.2, align 1
   %conv9.2 = zext i8 %36 to i32
-  %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1
-  %37 = load i8, i8* %arrayidx10.2, align 1
+  %arrayidx10.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 1
+  %37 = load i8, ptr %arrayidx10.2, align 1
   %conv11.2 = zext i8 %37 to i32
-  %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5
-  %38 = load i8, i8* %arrayidx13.2, align 1
+  %arrayidx13.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 5
+  %38 = load i8, ptr %arrayidx13.2, align 1
   %conv14.2 = zext i8 %38 to i32
-  %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5
-  %39 = load i8, i8* %arrayidx15.2, align 1
+  %arrayidx15.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 5
+  %39 = load i8, ptr %arrayidx15.2, align 1
   %conv16.2 = zext i8 %39 to i32
-  %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2
-  %40 = load i8, i8* %arrayidx20.2, align 1
+  %arrayidx20.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 2
+  %40 = load i8, ptr %arrayidx20.2, align 1
   %conv21.2 = zext i8 %40 to i32
-  %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2
-  %41 = load i8, i8* %arrayidx22.2, align 1
+  %arrayidx22.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 2
+  %41 = load i8, ptr %arrayidx22.2, align 1
   %conv23.2 = zext i8 %41 to i32
-  %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6
-  %42 = load i8, i8* %arrayidx25.2, align 1
+  %arrayidx25.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 6
+  %42 = load i8, ptr %arrayidx25.2, align 1
   %conv26.2 = zext i8 %42 to i32
-  %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6
-  %43 = load i8, i8* %arrayidx27.2, align 1
+  %arrayidx27.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 6
+  %43 = load i8, ptr %arrayidx27.2, align 1
   %conv28.2 = zext i8 %43 to i32
-  %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3
-  %44 = load i8, i8* %arrayidx32.2, align 1
+  %arrayidx32.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 3
+  %44 = load i8, ptr %arrayidx32.2, align 1
   %conv33.2 = zext i8 %44 to i32
-  %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3
-  %45 = load i8, i8* %arrayidx34.2, align 1
+  %arrayidx34.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 3
+  %45 = load i8, ptr %arrayidx34.2, align 1
   %conv35.2 = zext i8 %45 to i32
-  %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7
-  %46 = load i8, i8* %arrayidx37.2, align 1
+  %arrayidx37.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 7
+  %46 = load i8, ptr %arrayidx37.2, align 1
   %conv38.2 = zext i8 %46 to i32
-  %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7
-  %47 = load i8, i8* %arrayidx39.2, align 1
+  %arrayidx39.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 7
+  %47 = load i8, ptr %arrayidx39.2, align 1
   %conv40.2 = zext i8 %47 to i32
-  %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext
-  %48 = load i8, i8* %add.ptr.2, align 1
+  %add.ptr.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 %idx.ext
+  %48 = load i8, ptr %add.ptr.2, align 1
   %conv.3 = zext i8 %48 to i32
-  %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63
-  %49 = load i8, i8* %add.ptr64.2, align 1
+  %add.ptr64.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 %idx.ext63
+  %49 = load i8, ptr %add.ptr64.2, align 1
   %conv2.3 = zext i8 %49 to i32
-  %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4
-  %50 = load i8, i8* %arrayidx3.3, align 1
+  %arrayidx3.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 4
+  %50 = load i8, ptr %arrayidx3.3, align 1
   %conv4.3 = zext i8 %50 to i32
-  %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4
-  %51 = load i8, i8* %arrayidx5.3, align 1
+  %arrayidx5.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 4
+  %51 = load i8, ptr %arrayidx5.3, align 1
   %conv6.3 = zext i8 %51 to i32
-  %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1
-  %52 = load i8, i8* %arrayidx8.3, align 1
+  %arrayidx8.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 1
+  %52 = load i8, ptr %arrayidx8.3, align 1
   %conv9.3 = zext i8 %52 to i32
-  %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1
-  %53 = load i8, i8* %arrayidx10.3, align 1
+  %arrayidx10.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 1
+  %53 = load i8, ptr %arrayidx10.3, align 1
   %conv11.3 = zext i8 %53 to i32
-  %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5
-  %54 = load i8, i8* %arrayidx13.3, align 1
+  %arrayidx13.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 5
+  %54 = load i8, ptr %arrayidx13.3, align 1
   %conv14.3 = zext i8 %54 to i32
-  %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5
-  %55 = load i8, i8* %arrayidx15.3, align 1
+  %arrayidx15.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 5
+  %55 = load i8, ptr %arrayidx15.3, align 1
   %conv16.3 = zext i8 %55 to i32
-  %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2
-  %56 = load i8, i8* %arrayidx20.3, align 1
+  %arrayidx20.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 2
+  %56 = load i8, ptr %arrayidx20.3, align 1
   %conv21.3 = zext i8 %56 to i32
-  %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2
-  %57 = load i8, i8* %arrayidx22.3, align 1
+  %arrayidx22.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 2
+  %57 = load i8, ptr %arrayidx22.3, align 1
   %conv23.3 = zext i8 %57 to i32
-  %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6
-  %58 = load i8, i8* %arrayidx25.3, align 1
+  %arrayidx25.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 6
+  %58 = load i8, ptr %arrayidx25.3, align 1
   %conv26.3 = zext i8 %58 to i32
-  %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6
-  %59 = load i8, i8* %arrayidx27.3, align 1
+  %arrayidx27.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 6
+  %59 = load i8, ptr %arrayidx27.3, align 1
   %conv28.3 = zext i8 %59 to i32
-  %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3
-  %60 = load i8, i8* %arrayidx32.3, align 1
+  %arrayidx32.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 3
+  %60 = load i8, ptr %arrayidx32.3, align 1
   %conv33.3 = zext i8 %60 to i32
-  %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3
-  %61 = load i8, i8* %arrayidx34.3, align 1
+  %arrayidx34.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 3
+  %61 = load i8, ptr %arrayidx34.3, align 1
   %conv35.3 = zext i8 %61 to i32
-  %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7
-  %62 = load i8, i8* %arrayidx37.3, align 1
+  %arrayidx37.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 7
+  %62 = load i8, ptr %arrayidx37.3, align 1
   %conv38.3 = zext i8 %62 to i32
-  %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7
-  %63 = load i8, i8* %arrayidx39.3, align 1
+  %arrayidx39.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 7
+  %63 = load i8, ptr %arrayidx39.3, align 1
   %conv40.3 = zext i8 %63 to i32
 
   %m1 = mul i32 %conv, %conv4
@@ -686,228 +674,213 @@ entry:
   ret i32 %a16
 }
 
-define void @store_blockstrided3(i32* nocapture noundef readonly %x, i32* nocapture noundef readonly %y, i32* nocapture noundef writeonly %z, i32 noundef %stride) {
+define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, ptr nocapture noundef writeonly %z, i32 noundef %stride) {
 ; CHECK-LABEL: @store_blockstrided3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[STRIDE:%.*]], 1
 ; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1
 ; CHECK-NEXT:    [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM11]]
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]]
 ; CHECK-NEXT:    [[ADD18:%.*]] = add nsw i32 [[MUL]], 2
 ; CHECK-NEXT:    [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64
-; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM19]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM19]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
 ; CHECK-NEXT:    [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3
 ; CHECK-NEXT:    [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64
-; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM23]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX24]], align 4
+; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4
 ; CHECK-NEXT:    [[ADD26:%.*]] = add nsw i32 [[MUL21]], 1
 ; CHECK-NEXT:    [[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64
-; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM27]]
-; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 2
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX35]], align 4
-; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM5]]
-; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM11]]
-; CHECK-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM19]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX56]], align 4
-; CHECK-NEXT:    [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM23]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX60]], align 4
-; CHECK-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM27]]
-; CHECK-NEXT:    [[ARRAYIDX72:%.*]] = getelementptr inbounds i32, i32* [[Z:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]]
+; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 2
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]]
+; CHECK-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM19]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX56]], align 4
+; CHECK-NEXT:    [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4
+; CHECK-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]]
+; CHECK-NEXT:    [[ARRAYIDX72:%.*]] = getelementptr inbounds i32, ptr [[Z:%.*]], i64 1
 ; CHECK-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP3]], [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 6
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[X]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[Y]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[TMP10]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX41]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i32>, <2 x i32>* [[TMP12]], align 4
+; CHECK-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 6
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr [[X]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load <2 x i32>, ptr [[Y]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX41]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul nsw <2 x i32> [[TMP11]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP13]], [[TMP9]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i32* [[ARRAYIDX72]] to <4 x i32>*
-; CHECK-NEXT:    [[ARRAYIDX84:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX84:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 7
 ; CHECK-NEXT:    [[MUL85:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 11
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX12]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP18:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i32* [[ARRAYIDX28]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP20:%.*]] = load <2 x i32>, <2 x i32>* [[TMP19]], align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX48]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP22:%.*]] = load <2 x i32>, <2 x i32>* [[TMP21]], align 4
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX64]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP24:%.*]] = load <2 x i32>, <2 x i32>* [[TMP23]], align 4
-; CHECK-NEXT:    store i32 [[MUL73]], i32* [[Z]], align 4
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP16]], align 4
-; CHECK-NEXT:    store i32 [[MUL85]], i32* [[ARRAYIDX76]], align 4
-; CHECK-NEXT:    store i32 [[MUL87]], i32* [[ARRAYIDX88]], align 4
+; CHECK-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i32, ptr [[Z]], i64 11
+; CHECK-NEXT:    [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX12]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load <2 x i32>, ptr [[ARRAYIDX28]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = load <2 x i32>, ptr [[ARRAYIDX48]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4
+; CHECK-NEXT:    store i32 [[MUL73]], ptr [[Z]], align 4
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], ptr [[ARRAYIDX72]], align 4
+; CHECK-NEXT:    store i32 [[MUL85]], ptr [[ARRAYIDX76]], align 4
+; CHECK-NEXT:    store i32 [[MUL87]], ptr [[ARRAYIDX88]], align 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = mul nsw <2 x i32> [[TMP22]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = mul nsw <2 x i32> [[TMP24]], [[TMP20]]
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> [[TMP26]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[ARRAYIDX84]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP27]], align 4
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], ptr [[ARRAYIDX84]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %x, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %x, i64 1
-  %1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %x, i64 2
-  %2 = load i32, i32* %arrayidx2, align 4
+  %0 = load i32, ptr %x, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %x, i64 1
+  %1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %x, i64 2
+  %2 = load i32, ptr %arrayidx2, align 4
   %add4 = add nsw i32 %stride, 1
   %idxprom5 = sext i32 %add4 to i64
-  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %idxprom5
-  %3 = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %x, i64 %idxprom5
+  %3 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %stride, 2
   %idxprom8 = sext i32 %add7 to i64
-  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %idxprom8
-  %4 = load i32, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %x, i64 %idxprom8
+  %4 = load i32, ptr %arrayidx9, align 4
   %mul = shl nsw i32 %stride, 1
   %idxprom11 = sext i32 %mul to i64
-  %arrayidx12 = getelementptr inbounds i32, i32* %x, i64 %idxprom11
-  %5 = load i32, i32* %arrayidx12, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %x, i64 %idxprom11
+  %5 = load i32, ptr %arrayidx12, align 4
   %add14 = or i32 %mul, 1
   %idxprom15 = sext i32 %add14 to i64
-  %arrayidx16 = getelementptr inbounds i32, i32* %x, i64 %idxprom15
-  %6 = load i32, i32* %arrayidx16, align 4
+  %arrayidx16 = getelementptr inbounds i32, ptr %x, i64 %idxprom15
+  %6 = load i32, ptr %arrayidx16, align 4
   %add18 = add nsw i32 %mul, 2
   %idxprom19 = sext i32 %add18 to i64
-  %arrayidx20 = getelementptr inbounds i32, i32* %x, i64 %idxprom19
-  %7 = load i32, i32* %arrayidx20, align 4
+  %arrayidx20 = getelementptr inbounds i32, ptr %x, i64 %idxprom19
+  %7 = load i32, ptr %arrayidx20, align 4
   %mul21 = mul nsw i32 %stride, 3
   %idxprom23 = sext i32 %mul21 to i64
-  %arrayidx24 = getelementptr inbounds i32, i32* %x, i64 %idxprom23
-  %8 = load i32, i32* %arrayidx24, align 4
+  %arrayidx24 = getelementptr inbounds i32, ptr %x, i64 %idxprom23
+  %8 = load i32, ptr %arrayidx24, align 4
   %add26 = add nsw i32 %mul21, 1
   %idxprom27 = sext i32 %add26 to i64
-  %arrayidx28 = getelementptr inbounds i32, i32* %x, i64 %idxprom27
-  %9 = load i32, i32* %arrayidx28, align 4
+  %arrayidx28 = getelementptr inbounds i32, ptr %x, i64 %idxprom27
+  %9 = load i32, ptr %arrayidx28, align 4
   %add30 = add nsw i32 %mul21, 2
   %idxprom31 = sext i32 %add30 to i64
-  %arrayidx32 = getelementptr inbounds i32, i32* %x, i64 %idxprom31
-  %10 = load i32, i32* %arrayidx32, align 4
-  %11 = load i32, i32* %y, align 4
-  %arrayidx34 = getelementptr inbounds i32, i32* %y, i64 1
-  %12 = load i32, i32* %arrayidx34, align 4
-  %arrayidx35 = getelementptr inbounds i32, i32* %y, i64 2
-  %13 = load i32, i32* %arrayidx35, align 4
-  %arrayidx41 = getelementptr inbounds i32, i32* %y, i64 %idxprom5
-  %14 = load i32, i32* %arrayidx41, align 4
-  %arrayidx44 = getelementptr inbounds i32, i32* %y, i64 %idxprom8
-  %15 = load i32, i32* %arrayidx44, align 4
-  %arrayidx48 = getelementptr inbounds i32, i32* %y, i64 %idxprom11
-  %16 = load i32, i32* %arrayidx48, align 4
-  %arrayidx52 = getelementptr inbounds i32, i32* %y, i64 %idxprom15
-  %17 = load i32, i32* %arrayidx52, align 4
-  %arrayidx56 = getelementptr inbounds i32, i32* %y, i64 %idxprom19
-  %18 = load i32, i32* %arrayidx56, align 4
-  %arrayidx60 = getelementptr inbounds i32, i32* %y, i64 %idxprom23
-  %19 = load i32, i32* %arrayidx60, align 4
-  %arrayidx64 = getelementptr inbounds i32, i32* %y, i64 %idxprom27
-  %20 = load i32, i32* %arrayidx64, align 4
-  %arrayidx68 = getelementptr inbounds i32, i32* %y, i64 %idxprom31
-  %21 = load i32, i32* %arrayidx68, align 4
+  %arrayidx32 = getelementptr inbounds i32, ptr %x, i64 %idxprom31
+  %10 = load i32, ptr %arrayidx32, align 4
+  %11 = load i32, ptr %y, align 4
+  %arrayidx34 = getelementptr inbounds i32, ptr %y, i64 1
+  %12 = load i32, ptr %arrayidx34, align 4
+  %arrayidx35 = getelementptr inbounds i32, ptr %y, i64 2
+  %13 = load i32, ptr %arrayidx35, align 4
+  %arrayidx41 = getelementptr inbounds i32, ptr %y, i64 %idxprom5
+  %14 = load i32, ptr %arrayidx41, align 4
+  %arrayidx44 = getelementptr inbounds i32, ptr %y, i64 %idxprom8
+  %15 = load i32, ptr %arrayidx44, align 4
+  %arrayidx48 = getelementptr inbounds i32, ptr %y, i64 %idxprom11
+  %16 = load i32, ptr %arrayidx48, align 4
+  %arrayidx52 = getelementptr inbounds i32, ptr %y, i64 %idxprom15
+  %17 = load i32, ptr %arrayidx52, align 4
+  %arrayidx56 = getelementptr inbounds i32, ptr %y, i64 %idxprom19
+  %18 = load i32, ptr %arrayidx56, align 4
+  %arrayidx60 = getelementptr inbounds i32, ptr %y, i64 %idxprom23
+  %19 = load i32, ptr %arrayidx60, align 4
+  %arrayidx64 = getelementptr inbounds i32, ptr %y, i64 %idxprom27
+  %20 = load i32, ptr %arrayidx64, align 4
+  %arrayidx68 = getelementptr inbounds i32, ptr %y, i64 %idxprom31
+  %21 = load i32, ptr %arrayidx68, align 4
   %mul69 = mul nsw i32 %11, %0
-  %arrayidx70 = getelementptr inbounds i32, i32* %z, i64 2
-  store i32 %mul69, i32* %arrayidx70, align 4
+  %arrayidx70 = getelementptr inbounds i32, ptr %z, i64 2
+  store i32 %mul69, ptr %arrayidx70, align 4
   %mul71 = mul nsw i32 %12, %1
-  %arrayidx72 = getelementptr inbounds i32, i32* %z, i64 1
-  store i32 %mul71, i32* %arrayidx72, align 4
+  %arrayidx72 = getelementptr inbounds i32, ptr %z, i64 1
+  store i32 %mul71, ptr %arrayidx72, align 4
   %mul73 = mul nsw i32 %13, %2
-  store i32 %mul73, i32* %z, align 4
-  %arrayidx76 = getelementptr inbounds i32, i32* %z, i64 6
+  store i32 %mul73, ptr %z, align 4
+  %arrayidx76 = getelementptr inbounds i32, ptr %z, i64 6
   %mul77 = mul nsw i32 %14, %3
-  %arrayidx78 = getelementptr inbounds i32, i32* %z, i64 4
-  store i32 %mul77, i32* %arrayidx78, align 4
+  %arrayidx78 = getelementptr inbounds i32, ptr %z, i64 4
+  store i32 %mul77, ptr %arrayidx78, align 4
   %mul79 = mul nsw i32 %15, %4
-  %arrayidx80 = getelementptr inbounds i32, i32* %z, i64 3
-  store i32 %mul79, i32* %arrayidx80, align 4
+  %arrayidx80 = getelementptr inbounds i32, ptr %z, i64 3
+  store i32 %mul79, ptr %arrayidx80, align 4
   %mul81 = mul nsw i32 %16, %5
-  %arrayidx82 = getelementptr inbounds i32, i32* %z, i64 8
-  store i32 %mul81, i32* %arrayidx82, align 4
+  %arrayidx82 = getelementptr inbounds i32, ptr %z, i64 8
+  store i32 %mul81, ptr %arrayidx82, align 4
   %mul83 = mul nsw i32 %17, %6
-  %arrayidx84 = getelementptr inbounds i32, i32* %z, i64 7
-  store i32 %mul83, i32* %arrayidx84, align 4
+  %arrayidx84 = getelementptr inbounds i32, ptr %z, i64 7
+  store i32 %mul83, ptr %arrayidx84, align 4
   %mul85 = mul nsw i32 %18, %7
-  store i32 %mul85, i32* %arrayidx76, align 4
+  store i32 %mul85, ptr %arrayidx76, align 4
   %mul87 = mul nsw i32 %19, %8
-  %arrayidx88 = getelementptr inbounds i32, i32* %z, i64 11
-  store i32 %mul87, i32* %arrayidx88, align 4
+  %arrayidx88 = getelementptr inbounds i32, ptr %z, i64 11
+  store i32 %mul87, ptr %arrayidx88, align 4
   %mul89 = mul nsw i32 %20, %9
-  %arrayidx90 = getelementptr inbounds i32, i32* %z, i64 10
-  store i32 %mul89, i32* %arrayidx90, align 4
+  %arrayidx90 = getelementptr inbounds i32, ptr %z, i64 10
+  store i32 %mul89, ptr %arrayidx90, align 4
   %mul91 = mul nsw i32 %21, %10
-  %arrayidx92 = getelementptr inbounds i32, i32* %z, i64 9
-  store i32 %mul91, i32* %arrayidx92, align 4
+  %arrayidx92 = getelementptr inbounds i32, ptr %z, i64 9
+  store i32 %mul91, ptr %arrayidx92, align 4
   ret void
 }
 
-define void @store_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride, i16 *%dst0) {
+define void @store_blockstrided4(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, i32 noundef %stride, ptr %dst0) {
 ; CHECK-LABEL: @store_blockstrided4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[X]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX4]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[Y]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX20]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[TMP6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[X]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr [[Y]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX20]], align 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i16> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i16> [[TMP7]], [[TMP3]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[DST0:%.*]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], <8 x i16>* [[TMP10]], align 2
+; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], ptr [[DST0:%.*]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i16, i16* %x, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
-  %1 = load i16, i16* %arrayidx1, align 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2
-  %2 = load i16, i16* %arrayidx2, align 2
-  %arrayidx3 = getelementptr inbounds i16, i16* %x, i64 3
-  %3 = load i16, i16* %arrayidx3, align 2
+  %0 = load i16, ptr %x, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %x, i64 1
+  %1 = load i16, ptr %arrayidx1, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %x, i64 2
+  %2 = load i16, ptr %arrayidx2, align 2
+  %arrayidx3 = getelementptr inbounds i16, ptr %x, i64 3
+  %3 = load i16, ptr %arrayidx3, align 2
   %idxprom = sext i32 %stride to i64
-  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom
-  %4 = load i16, i16* %arrayidx4, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %x, i64 %idxprom
+  %4 = load i16, ptr %arrayidx4, align 2
   %add5 = add nsw i32 %stride, 1
   %idxprom6 = sext i32 %add5 to i64
-  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
-  %5 = load i16, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %x, i64 %idxprom6
+  %5 = load i16, ptr %arrayidx7, align 2
   %add8 = add nsw i32 %stride, 2
   %idxprom9 = sext i32 %add8 to i64
-  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
-  %6 = load i16, i16* %arrayidx10, align 2
+  %arrayidx10 = getelementptr inbounds i16, ptr %x, i64 %idxprom9
+  %6 = load i16, ptr %arrayidx10, align 2
   %add11 = add nsw i32 %stride, 3
   %idxprom12 = sext i32 %add11 to i64
-  %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12
-  %7 = load i16, i16* %arrayidx13, align 2
-  %8 = load i16, i16* %y, align 2
-  %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1
-  %9 = load i16, i16* %arrayidx15, align 2
-  %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2
-  %10 = load i16, i16* %arrayidx16, align 2
-  %arrayidx17 = getelementptr inbounds i16, i16* %y, i64 3
-  %11 = load i16, i16* %arrayidx17, align 2
-  %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom
-  %12 = load i16, i16* %arrayidx20, align 2
-  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
-  %13 = load i16, i16* %arrayidx23, align 2
-  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
-  %14 = load i16, i16* %arrayidx26, align 2
-  %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom12
-  %15 = load i16, i16* %arrayidx29, align 2
+  %arrayidx13 = getelementptr inbounds i16, ptr %x, i64 %idxprom12
+  %7 = load i16, ptr %arrayidx13, align 2
+  %8 = load i16, ptr %y, align 2
+  %arrayidx15 = getelementptr inbounds i16, ptr %y, i64 1
+  %9 = load i16, ptr %arrayidx15, align 2
+  %arrayidx16 = getelementptr inbounds i16, ptr %y, i64 2
+  %10 = load i16, ptr %arrayidx16, align 2
+  %arrayidx17 = getelementptr inbounds i16, ptr %y, i64 3
+  %11 = load i16, ptr %arrayidx17, align 2
+  %arrayidx20 = getelementptr inbounds i16, ptr %y, i64 %idxprom
+  %12 = load i16, ptr %arrayidx20, align 2
+  %arrayidx23 = getelementptr inbounds i16, ptr %y, i64 %idxprom6
+  %13 = load i16, ptr %arrayidx23, align 2
+  %arrayidx26 = getelementptr inbounds i16, ptr %y, i64 %idxprom9
+  %14 = load i16, ptr %arrayidx26, align 2
+  %arrayidx29 = getelementptr inbounds i16, ptr %y, i64 %idxprom12
+  %15 = load i16, ptr %arrayidx29, align 2
   %mul = mul i16 %8, %0
   %mul36 = mul i16 %9, %1
   %mul42 = mul i16 %11, %3
@@ -916,269 +889,257 @@ entry:
   %mul60 = mul i16 %12, %4
   %mul66 = mul i16 %15, %7
   %mul72 = mul i16 %14, %6
-  %dst1 = getelementptr inbounds i16, i16* %dst0, i64 1
-  %dst2 = getelementptr inbounds i16, i16* %dst0, i64 2
-  %dst3 = getelementptr inbounds i16, i16* %dst0, i64 3
-  %dst4 = getelementptr inbounds i16, i16* %dst0, i64 4
-  %dst5 = getelementptr inbounds i16, i16* %dst0, i64 5
-  %dst6 = getelementptr inbounds i16, i16* %dst0, i64 6
-  %dst7 = getelementptr inbounds i16, i16* %dst0, i64 7
-  store i16 %mul, i16* %dst0
-  store i16 %mul36, i16* %dst1
-  store i16 %mul42, i16* %dst2
-  store i16 %mul48, i16* %dst3
-  store i16 %mul54, i16* %dst4
-  store i16 %mul60, i16* %dst5
-  store i16 %mul66, i16* %dst6
-  store i16 %mul72, i16* %dst7
+  %dst1 = getelementptr inbounds i16, ptr %dst0, i64 1
+  %dst2 = getelementptr inbounds i16, ptr %dst0, i64 2
+  %dst3 = getelementptr inbounds i16, ptr %dst0, i64 3
+  %dst4 = getelementptr inbounds i16, ptr %dst0, i64 4
+  %dst5 = getelementptr inbounds i16, ptr %dst0, i64 5
+  %dst6 = getelementptr inbounds i16, ptr %dst0, i64 6
+  %dst7 = getelementptr inbounds i16, ptr %dst0, i64 7
+  store i16 %mul, ptr %dst0
+  store i16 %mul36, ptr %dst1
+  store i16 %mul42, ptr %dst2
+  store i16 %mul48, ptr %dst3
+  store i16 %mul54, ptr %dst4
+  store i16 %mul60, ptr %dst5
+  store i16 %mul66, ptr %dst6
+  store i16 %mul72, ptr %dst7
   ret void
 }
 
-define void @store_blockstrided4x4(i8* nocapture noundef readonly %p1, i32 noundef %off1, i8* nocapture noundef readonly %p2, i32 noundef %off2, i32 *%dst0) {
+define void @store_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 noundef %off1, ptr nocapture noundef readonly %p2, i32 noundef %off2, ptr %dst0) {
 ; CHECK-LABEL: @store_blockstrided4x4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4
-; CHECK-NEXT:    [[DST4:%.*]] = getelementptr inbounds i32, i32* [[DST0:%.*]], i64 4
-; CHECK-NEXT:    [[DST8:%.*]] = getelementptr inbounds i32, i32* [[DST0]], i64 8
-; CHECK-NEXT:    [[DST12:%.*]] = getelementptr inbounds i32, i32* [[DST0]], i64 12
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
+; CHECK-NEXT:    [[DST4:%.*]] = getelementptr inbounds i32, ptr [[DST0:%.*]], i64 4
+; CHECK-NEXT:    [[DST8:%.*]] = getelementptr inbounds i32, ptr [[DST0]], i64 8
+; CHECK-NEXT:    [[DST12:%.*]] = getelementptr inbounds i32, ptr [[DST0]], i64 12
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P1]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[DST0]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[P2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[P2]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP10]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i32* [[DST4]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i32>
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP20:%.*]] = load <4 x i8>, <4 x i8>* [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
 ; CHECK-NEXT:    [[TMP21:%.*]] = zext <4 x i8> [[TMP20]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP22:%.*]] = mul nuw nsw <4 x i32> [[TMP18]], [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[DST8]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1
+; CHECK-NEXT:    [[TMP25:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext <4 x i8> [[TMP25]] to <4 x i32>
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP28:%.*]] = load <4 x i8>, <4 x i8>* [[TMP27]], align 1
+; CHECK-NEXT:    [[TMP28:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
 ; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP30:%.*]] = mul nuw nsw <4 x i32> [[TMP26]], [[TMP29]]
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
-; CHECK-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4
-; CHECK-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4
-; CHECK-NEXT:    [[TMP31:%.*]] = bitcast i32* [[DST12]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP30]], <4 x i32>* [[TMP31]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[DST0]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP14]], ptr [[DST4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[DST8]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP30]], ptr [[DST12]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %idx.ext = sext i32 %off1 to i64
   %idx.ext63 = sext i32 %off2 to i64
 
-  %0 = load i8, i8* %p1, align 1
+  %0 = load i8, ptr %p1, align 1
   %conv = zext i8 %0 to i32
-  %1 = load i8, i8* %p2, align 1
+  %1 = load i8, ptr %p2, align 1
   %conv2 = zext i8 %1 to i32
-  %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4
-  %2 = load i8, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %p1, i64 4
+  %2 = load i8, ptr %arrayidx3, align 1
   %conv4 = zext i8 %2 to i32
-  %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4
-  %3 = load i8, i8* %arrayidx5, align 1
+  %arrayidx5 = getelementptr inbounds i8, ptr %p2, i64 4
+  %3 = load i8, ptr %arrayidx5, align 1
   %conv6 = zext i8 %3 to i32
-  %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1
-  %4 = load i8, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %p1, i64 1
+  %4 = load i8, ptr %arrayidx8, align 1
   %conv9 = zext i8 %4 to i32
-  %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1
-  %5 = load i8, i8* %arrayidx10, align 1
+  %arrayidx10 = getelementptr inbounds i8, ptr %p2, i64 1
+  %5 = load i8, ptr %arrayidx10, align 1
   %conv11 = zext i8 %5 to i32
-  %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5
-  %6 = load i8, i8* %arrayidx13, align 1
+  %arrayidx13 = getelementptr inbounds i8, ptr %p1, i64 5
+  %6 = load i8, ptr %arrayidx13, align 1
   %conv14 = zext i8 %6 to i32
-  %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5
-  %7 = load i8, i8* %arrayidx15, align 1
+  %arrayidx15 = getelementptr inbounds i8, ptr %p2, i64 5
+  %7 = load i8, ptr %arrayidx15, align 1
   %conv16 = zext i8 %7 to i32
-  %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2
-  %8 = load i8, i8* %arrayidx20, align 1
+  %arrayidx20 = getelementptr inbounds i8, ptr %p1, i64 2
+  %8 = load i8, ptr %arrayidx20, align 1
   %conv21 = zext i8 %8 to i32
-  %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2
-  %9 = load i8, i8* %arrayidx22, align 1
+  %arrayidx22 = getelementptr inbounds i8, ptr %p2, i64 2
+  %9 = load i8, ptr %arrayidx22, align 1
   %conv23 = zext i8 %9 to i32
-  %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6
-  %10 = load i8, i8* %arrayidx25, align 1
+  %arrayidx25 = getelementptr inbounds i8, ptr %p1, i64 6
+  %10 = load i8, ptr %arrayidx25, align 1
   %conv26 = zext i8 %10 to i32
-  %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6
-  %11 = load i8, i8* %arrayidx27, align 1
+  %arrayidx27 = getelementptr inbounds i8, ptr %p2, i64 6
+  %11 = load i8, ptr %arrayidx27, align 1
   %conv28 = zext i8 %11 to i32
-  %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3
-  %12 = load i8, i8* %arrayidx32, align 1
+  %arrayidx32 = getelementptr inbounds i8, ptr %p1, i64 3
+  %12 = load i8, ptr %arrayidx32, align 1
   %conv33 = zext i8 %12 to i32
-  %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3
-  %13 = load i8, i8* %arrayidx34, align 1
+  %arrayidx34 = getelementptr inbounds i8, ptr %p2, i64 3
+  %13 = load i8, ptr %arrayidx34, align 1
   %conv35 = zext i8 %13 to i32
-  %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7
-  %14 = load i8, i8* %arrayidx37, align 1
+  %arrayidx37 = getelementptr inbounds i8, ptr %p1, i64 7
+  %14 = load i8, ptr %arrayidx37, align 1
   %conv38 = zext i8 %14 to i32
-  %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7
-  %15 = load i8, i8* %arrayidx39, align 1
+  %arrayidx39 = getelementptr inbounds i8, ptr %p2, i64 7
+  %15 = load i8, ptr %arrayidx39, align 1
   %conv40 = zext i8 %15 to i32
-  %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext
-  %16 = load i8, i8* %add.ptr, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %p1, i64 %idx.ext
+  %16 = load i8, ptr %add.ptr, align 1
   %conv.1 = zext i8 %16 to i32
-  %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63
-  %17 = load i8, i8* %add.ptr64, align 1
+  %add.ptr64 = getelementptr inbounds i8, ptr %p2, i64 %idx.ext63
+  %17 = load i8, ptr %add.ptr64, align 1
   %conv2.1 = zext i8 %17 to i32
-  %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4
-  %18 = load i8, i8* %arrayidx3.1, align 1
+  %arrayidx3.1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
+  %18 = load i8, ptr %arrayidx3.1, align 1
   %conv4.1 = zext i8 %18 to i32
-  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
-  %19 = load i8, i8* %arrayidx5.1, align 1
+  %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
+  %19 = load i8, ptr %arrayidx5.1, align 1
   %conv6.1 = zext i8 %19 to i32
-  %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1
-  %20 = load i8, i8* %arrayidx8.1, align 1
+  %arrayidx8.1 = getelementptr inbounds i8, ptr %add.ptr, i64 1
+  %20 = load i8, ptr %arrayidx8.1, align 1
   %conv9.1 = zext i8 %20 to i32
-  %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1
-  %21 = load i8, i8* %arrayidx10.1, align 1
+  %arrayidx10.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 1
+  %21 = load i8, ptr %arrayidx10.1, align 1
   %conv11.1 = zext i8 %21 to i32
-  %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
-  %22 = load i8, i8* %arrayidx13.1, align 1
+  %arrayidx13.1 = getelementptr inbounds i8, ptr %add.ptr, i64 5
+  %22 = load i8, ptr %arrayidx13.1, align 1
   %conv14.1 = zext i8 %22 to i32
-  %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5
-  %23 = load i8, i8* %arrayidx15.1, align 1
+  %arrayidx15.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 5
+  %23 = load i8, ptr %arrayidx15.1, align 1
   %conv16.1 = zext i8 %23 to i32
-  %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2
-  %24 = load i8, i8* %arrayidx20.1, align 1
+  %arrayidx20.1 = getelementptr inbounds i8, ptr %add.ptr, i64 2
+  %24 = load i8, ptr %arrayidx20.1, align 1
   %conv21.1 = zext i8 %24 to i32
-  %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2
-  %25 = load i8, i8* %arrayidx22.1, align 1
+  %arrayidx22.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 2
+  %25 = load i8, ptr %arrayidx22.1, align 1
   %conv23.1 = zext i8 %25 to i32
-  %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6
-  %26 = load i8, i8* %arrayidx25.1, align 1
+  %arrayidx25.1 = getelementptr inbounds i8, ptr %add.ptr, i64 6
+  %26 = load i8, ptr %arrayidx25.1, align 1
   %conv26.1 = zext i8 %26 to i32
-  %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6
-  %27 = load i8, i8* %arrayidx27.1, align 1
+  %arrayidx27.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 6
+  %27 = load i8, ptr %arrayidx27.1, align 1
   %conv28.1 = zext i8 %27 to i32
-  %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3
-  %28 = load i8, i8* %arrayidx32.1, align 1
+  %arrayidx32.1 = getelementptr inbounds i8, ptr %add.ptr, i64 3
+  %28 = load i8, ptr %arrayidx32.1, align 1
   %conv33.1 = zext i8 %28 to i32
-  %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3
-  %29 = load i8, i8* %arrayidx34.1, align 1
+  %arrayidx34.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 3
+  %29 = load i8, ptr %arrayidx34.1, align 1
   %conv35.1 = zext i8 %29 to i32
-  %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7
-  %30 = load i8, i8* %arrayidx37.1, align 1
+  %arrayidx37.1 = getelementptr inbounds i8, ptr %add.ptr, i64 7
+  %30 = load i8, ptr %arrayidx37.1, align 1
   %conv38.1 = zext i8 %30 to i32
-  %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7
-  %31 = load i8, i8* %arrayidx39.1, align 1
+  %arrayidx39.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 7
+  %31 = load i8, ptr %arrayidx39.1, align 1
   %conv40.1 = zext i8 %31 to i32
-  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext
-  %32 = load i8, i8* %add.ptr.1, align 1
+  %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext
+  %32 = load i8, ptr %add.ptr.1, align 1
   %conv.2 = zext i8 %32 to i32
-  %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63
-  %33 = load i8, i8* %add.ptr64.1, align 1
+  %add.ptr64.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 %idx.ext63
+  %33 = load i8, ptr %add.ptr64.1, align 1
   %conv2.2 = zext i8 %33 to i32
-  %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4
-  %34 = load i8, i8* %arrayidx3.2, align 1
+  %arrayidx3.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 4
+  %34 = load i8, ptr %arrayidx3.2, align 1
   %conv4.2 = zext i8 %34 to i32
-  %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4
-  %35 = load i8, i8* %arrayidx5.2, align 1
+  %arrayidx5.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 4
+  %35 = load i8, ptr %arrayidx5.2, align 1
   %conv6.2 = zext i8 %35 to i32
-  %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1
-  %36 = load i8, i8* %arrayidx8.2, align 1
+  %arrayidx8.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 1
+  %36 = load i8, ptr %arrayidx8.2, align 1
   %conv9.2 = zext i8 %36 to i32
-  %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1
-  %37 = load i8, i8* %arrayidx10.2, align 1
+  %arrayidx10.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 1
+  %37 = load i8, ptr %arrayidx10.2, align 1
   %conv11.2 = zext i8 %37 to i32
-  %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5
-  %38 = load i8, i8* %arrayidx13.2, align 1
+  %arrayidx13.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 5
+  %38 = load i8, ptr %arrayidx13.2, align 1
   %conv14.2 = zext i8 %38 to i32
-  %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5
-  %39 = load i8, i8* %arrayidx15.2, align 1
+  %arrayidx15.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 5
+  %39 = load i8, ptr %arrayidx15.2, align 1
   %conv16.2 = zext i8 %39 to i32
-  %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2
-  %40 = load i8, i8* %arrayidx20.2, align 1
+  %arrayidx20.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 2
+  %40 = load i8, ptr %arrayidx20.2, align 1
   %conv21.2 = zext i8 %40 to i32
-  %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2
-  %41 = load i8, i8* %arrayidx22.2, align 1
+  %arrayidx22.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 2
+  %41 = load i8, ptr %arrayidx22.2, align 1
   %conv23.2 = zext i8 %41 to i32
-  %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6
-  %42 = load i8, i8* %arrayidx25.2, align 1
+  %arrayidx25.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 6
+  %42 = load i8, ptr %arrayidx25.2, align 1
   %conv26.2 = zext i8 %42 to i32
-  %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6
-  %43 = load i8, i8* %arrayidx27.2, align 1
+  %arrayidx27.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 6
+  %43 = load i8, ptr %arrayidx27.2, align 1
   %conv28.2 = zext i8 %43 to i32
-  %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3
-  %44 = load i8, i8* %arrayidx32.2, align 1
+  %arrayidx32.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 3
+  %44 = load i8, ptr %arrayidx32.2, align 1
   %conv33.2 = zext i8 %44 to i32
-  %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3
-  %45 = load i8, i8* %arrayidx34.2, align 1
+  %arrayidx34.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 3
+  %45 = load i8, ptr %arrayidx34.2, align 1
   %conv35.2 = zext i8 %45 to i32
-  %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7
-  %46 = load i8, i8* %arrayidx37.2, align 1
+  %arrayidx37.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 7
+  %46 = load i8, ptr %arrayidx37.2, align 1
   %conv38.2 = zext i8 %46 to i32
-  %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7
-  %47 = load i8, i8* %arrayidx39.2, align 1
+  %arrayidx39.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 7
+  %47 = load i8, ptr %arrayidx39.2, align 1
   %conv40.2 = zext i8 %47 to i32
-  %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext
-  %48 = load i8, i8* %add.ptr.2, align 1
+  %add.ptr.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 %idx.ext
+  %48 = load i8, ptr %add.ptr.2, align 1
   %conv.3 = zext i8 %48 to i32
-  %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63
-  %49 = load i8, i8* %add.ptr64.2, align 1
+  %add.ptr64.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 %idx.ext63
+  %49 = load i8, ptr %add.ptr64.2, align 1
   %conv2.3 = zext i8 %49 to i32
-  %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4
-  %50 = load i8, i8* %arrayidx3.3, align 1
+  %arrayidx3.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 4
+  %50 = load i8, ptr %arrayidx3.3, align 1
   %conv4.3 = zext i8 %50 to i32
-  %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4
-  %51 = load i8, i8* %arrayidx5.3, align 1
+  %arrayidx5.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 4
+  %51 = load i8, ptr %arrayidx5.3, align 1
   %conv6.3 = zext i8 %51 to i32
-  %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1
-  %52 = load i8, i8* %arrayidx8.3, align 1
+  %arrayidx8.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 1
+  %52 = load i8, ptr %arrayidx8.3, align 1
   %conv9.3 = zext i8 %52 to i32
-  %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1
-  %53 = load i8, i8* %arrayidx10.3, align 1
+  %arrayidx10.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 1
+  %53 = load i8, ptr %arrayidx10.3, align 1
   %conv11.3 = zext i8 %53 to i32
-  %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5
-  %54 = load i8, i8* %arrayidx13.3, align 1
+  %arrayidx13.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 5
+  %54 = load i8, ptr %arrayidx13.3, align 1
   %conv14.3 = zext i8 %54 to i32
-  %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5
-  %55 = load i8, i8* %arrayidx15.3, align 1
+  %arrayidx15.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 5
+  %55 = load i8, ptr %arrayidx15.3, align 1
   %conv16.3 = zext i8 %55 to i32
-  %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2
-  %56 = load i8, i8* %arrayidx20.3, align 1
+  %arrayidx20.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 2
+  %56 = load i8, ptr %arrayidx20.3, align 1
   %conv21.3 = zext i8 %56 to i32
-  %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2
-  %57 = load i8, i8* %arrayidx22.3, align 1
+  %arrayidx22.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 2
+  %57 = load i8, ptr %arrayidx22.3, align 1
   %conv23.3 = zext i8 %57 to i32
-  %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6
-  %58 = load i8, i8* %arrayidx25.3, align 1
+  %arrayidx25.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 6
+  %58 = load i8, ptr %arrayidx25.3, align 1
   %conv26.3 = zext i8 %58 to i32
-  %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6
-  %59 = load i8, i8* %arrayidx27.3, align 1
+  %arrayidx27.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 6
+  %59 = load i8, ptr %arrayidx27.3, align 1
   %conv28.3 = zext i8 %59 to i32
-  %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3
-  %60 = load i8, i8* %arrayidx32.3, align 1
+  %arrayidx32.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 3
+  %60 = load i8, ptr %arrayidx32.3, align 1
   %conv33.3 = zext i8 %60 to i32
-  %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3
-  %61 = load i8, i8* %arrayidx34.3, align 1
+  %arrayidx34.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 3
+  %61 = load i8, ptr %arrayidx34.3, align 1
   %conv35.3 = zext i8 %61 to i32
-  %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7
-  %62 = load i8, i8* %arrayidx37.3, align 1
+  %arrayidx37.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 7
+  %62 = load i8, ptr %arrayidx37.3, align 1
   %conv38.3 = zext i8 %62 to i32
-  %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7
-  %63 = load i8, i8* %arrayidx39.3, align 1
+  %arrayidx39.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 7
+  %63 = load i8, ptr %arrayidx39.3, align 1
   %conv40.3 = zext i8 %63 to i32
 
   %m1 = mul i32 %conv, %conv4
@@ -1198,93 +1159,79 @@ entry:
   %m15 = mul i32 %conv23.1, %conv28.1
   %m16 = mul i32 %conv35.1, %conv40.1
 
-  %dst1 = getelementptr inbounds i32, i32* %dst0, i64 1
-  %dst2 = getelementptr inbounds i32, i32* %dst0, i64 2
-  %dst3 = getelementptr inbounds i32, i32* %dst0, i64 3
-  %dst4 = getelementptr inbounds i32, i32* %dst0, i64 4
-  %dst5 = getelementptr inbounds i32, i32* %dst0, i64 5
-  %dst6 = getelementptr inbounds i32, i32* %dst0, i64 6
-  %dst7 = getelementptr inbounds i32, i32* %dst0, i64 7
-  %dst8 = getelementptr inbounds i32, i32* %dst0, i64 8
-  %dst9 = getelementptr inbounds i32, i32* %dst0, i64 9
-  %dst10 = getelementptr inbounds i32, i32* %dst0, i64 10
-  %dst11 = getelementptr inbounds i32, i32* %dst0, i64 11
-  %dst12 = getelementptr inbounds i32, i32* %dst0, i64 12
-  %dst13 = getelementptr inbounds i32, i32* %dst0, i64 13
-  %dst14 = getelementptr inbounds i32, i32* %dst0, i64 14
-  %dst15 = getelementptr inbounds i32, i32* %dst0, i64 15
-  store i32 %m1, i32* %dst0
-  store i32 %m2, i32* %dst1
-  store i32 %m3, i32* %dst2
-  store i32 %m4, i32* %dst3
-  store i32 %m5, i32* %dst4
-  store i32 %m6, i32* %dst5
-  store i32 %m7, i32* %dst6
-  store i32 %m8, i32* %dst7
-  store i32 %m9, i32* %dst8
-  store i32 %m10, i32* %dst9
-  store i32 %m11, i32* %dst10
-  store i32 %m12, i32* %dst11
-  store i32 %m13, i32* %dst12
-  store i32 %m14, i32* %dst13
-  store i32 %m15, i32* %dst14
-  store i32 %m16, i32* %dst15
+  %dst1 = getelementptr inbounds i32, ptr %dst0, i64 1
+  %dst2 = getelementptr inbounds i32, ptr %dst0, i64 2
+  %dst3 = getelementptr inbounds i32, ptr %dst0, i64 3
+  %dst4 = getelementptr inbounds i32, ptr %dst0, i64 4
+  %dst5 = getelementptr inbounds i32, ptr %dst0, i64 5
+  %dst6 = getelementptr inbounds i32, ptr %dst0, i64 6
+  %dst7 = getelementptr inbounds i32, ptr %dst0, i64 7
+  %dst8 = getelementptr inbounds i32, ptr %dst0, i64 8
+  %dst9 = getelementptr inbounds i32, ptr %dst0, i64 9
+  %dst10 = getelementptr inbounds i32, ptr %dst0, i64 10
+  %dst11 = getelementptr inbounds i32, ptr %dst0, i64 11
+  %dst12 = getelementptr inbounds i32, ptr %dst0, i64 12
+  %dst13 = getelementptr inbounds i32, ptr %dst0, i64 13
+  %dst14 = getelementptr inbounds i32, ptr %dst0, i64 14
+  %dst15 = getelementptr inbounds i32, ptr %dst0, i64 15
+  store i32 %m1, ptr %dst0
+  store i32 %m2, ptr %dst1
+  store i32 %m3, ptr %dst2
+  store i32 %m4, ptr %dst3
+  store i32 %m5, ptr %dst4
+  store i32 %m6, ptr %dst5
+  store i32 %m7, ptr %dst6
+  store i32 %m8, ptr %dst7
+  store i32 %m9, ptr %dst8
+  store i32 %m10, ptr %dst9
+  store i32 %m11, ptr %dst10
+  store i32 %m12, ptr %dst11
+  store i32 %m13, ptr %dst12
+  store i32 %m14, ptr %dst13
+  store i32 %m15, ptr %dst14
+  store i32 %m16, ptr %dst15
   ret void
 }
 
-define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, i8* nocapture noundef readonly %p2, i32 noundef %st2) {
+define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr nocapture noundef readonly %p2, i32 noundef %st2) {
 ; CHECK-LABEL: @full(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST1:%.*]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[ST2:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4
-; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 4
-; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 4
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP13:%.*]] = load <4 x i8>, <4 x i8>* [[TMP12]], align 1
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP15:%.*]] = load <4 x i8>, <4 x i8>* [[TMP14]], align 1
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[ADD_PTR_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[ADD_PTR64_1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX3_2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX5_2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1
-; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8* [[ADD_PTR_2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
+; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4
+; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]]
+; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr [[P2]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
+; CHECK-NEXT:    [[TMP15:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT:    [[TMP23:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT:    [[TMP25:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1
 ; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i32>
-; CHECK-NEXT:    [[TMP32:%.*]] = bitcast i8* [[ADD_PTR64_2]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
+; CHECK-NEXT:    [[TMP33:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1292,16 +1239,14 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP40:%.*]] = sub nsw <16 x i32> [[TMP31]], [[TMP39]]
-; CHECK-NEXT:    [[TMP41:%.*]] = bitcast i8* [[ARRAYIDX3_3]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP42:%.*]] = load <4 x i8>, <4 x i8>* [[TMP41]], align 1
+; CHECK-NEXT:    [[TMP42:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1
 ; CHECK-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32>
-; CHECK-NEXT:    [[TMP49:%.*]] = bitcast i8* [[ARRAYIDX5_3]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP50:%.*]] = load <4 x i8>, <4 x i8>* [[TMP49]], align 1
+; CHECK-NEXT:    [[TMP50:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1
 ; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1342,64 +1287,64 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1,
 entry:
   %idx.ext = sext i32 %st1 to i64
   %idx.ext63 = sext i32 %st2 to i64
-  %0 = load i8, i8* %p1, align 1
+  %0 = load i8, ptr %p1, align 1
   %conv = zext i8 %0 to i32
-  %1 = load i8, i8* %p2, align 1
+  %1 = load i8, ptr %p2, align 1
   %conv2 = zext i8 %1 to i32
   %sub = sub nsw i32 %conv, %conv2
-  %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4
-  %2 = load i8, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %p1, i64 4
+  %2 = load i8, ptr %arrayidx3, align 1
   %conv4 = zext i8 %2 to i32
-  %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4
-  %3 = load i8, i8* %arrayidx5, align 1
+  %arrayidx5 = getelementptr inbounds i8, ptr %p2, i64 4
+  %3 = load i8, ptr %arrayidx5, align 1
   %conv6 = zext i8 %3 to i32
   %sub7 = sub nsw i32 %conv4, %conv6
   %shl = shl nsw i32 %sub7, 16
   %add = add nsw i32 %shl, %sub
-  %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1
-  %4 = load i8, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %p1, i64 1
+  %4 = load i8, ptr %arrayidx8, align 1
   %conv9 = zext i8 %4 to i32
-  %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1
-  %5 = load i8, i8* %arrayidx10, align 1
+  %arrayidx10 = getelementptr inbounds i8, ptr %p2, i64 1
+  %5 = load i8, ptr %arrayidx10, align 1
   %conv11 = zext i8 %5 to i32
   %sub12 = sub nsw i32 %conv9, %conv11
-  %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5
-  %6 = load i8, i8* %arrayidx13, align 1
+  %arrayidx13 = getelementptr inbounds i8, ptr %p1, i64 5
+  %6 = load i8, ptr %arrayidx13, align 1
   %conv14 = zext i8 %6 to i32
-  %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5
-  %7 = load i8, i8* %arrayidx15, align 1
+  %arrayidx15 = getelementptr inbounds i8, ptr %p2, i64 5
+  %7 = load i8, ptr %arrayidx15, align 1
   %conv16 = zext i8 %7 to i32
   %sub17 = sub nsw i32 %conv14, %conv16
   %shl18 = shl nsw i32 %sub17, 16
   %add19 = add nsw i32 %shl18, %sub12
-  %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2
-  %8 = load i8, i8* %arrayidx20, align 1
+  %arrayidx20 = getelementptr inbounds i8, ptr %p1, i64 2
+  %8 = load i8, ptr %arrayidx20, align 1
   %conv21 = zext i8 %8 to i32
-  %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2
-  %9 = load i8, i8* %arrayidx22, align 1
+  %arrayidx22 = getelementptr inbounds i8, ptr %p2, i64 2
+  %9 = load i8, ptr %arrayidx22, align 1
   %conv23 = zext i8 %9 to i32
   %sub24 = sub nsw i32 %conv21, %conv23
-  %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6
-  %10 = load i8, i8* %arrayidx25, align 1
+  %arrayidx25 = getelementptr inbounds i8, ptr %p1, i64 6
+  %10 = load i8, ptr %arrayidx25, align 1
   %conv26 = zext i8 %10 to i32
-  %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6
-  %11 = load i8, i8* %arrayidx27, align 1
+  %arrayidx27 = getelementptr inbounds i8, ptr %p2, i64 6
+  %11 = load i8, ptr %arrayidx27, align 1
   %conv28 = zext i8 %11 to i32
   %sub29 = sub nsw i32 %conv26, %conv28
   %shl30 = shl nsw i32 %sub29, 16
   %add31 = add nsw i32 %shl30, %sub24
-  %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3
-  %12 = load i8, i8* %arrayidx32, align 1
+  %arrayidx32 = getelementptr inbounds i8, ptr %p1, i64 3
+  %12 = load i8, ptr %arrayidx32, align 1
   %conv33 = zext i8 %12 to i32
-  %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3
-  %13 = load i8, i8* %arrayidx34, align 1
+  %arrayidx34 = getelementptr inbounds i8, ptr %p2, i64 3
+  %13 = load i8, ptr %arrayidx34, align 1
   %conv35 = zext i8 %13 to i32
   %sub36 = sub nsw i32 %conv33, %conv35
-  %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7
-  %14 = load i8, i8* %arrayidx37, align 1
+  %arrayidx37 = getelementptr inbounds i8, ptr %p1, i64 7
+  %14 = load i8, ptr %arrayidx37, align 1
   %conv38 = zext i8 %14 to i32
-  %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7
-  %15 = load i8, i8* %arrayidx39, align 1
+  %arrayidx39 = getelementptr inbounds i8, ptr %p2, i64 7
+  %15 = load i8, ptr %arrayidx39, align 1
   %conv40 = zext i8 %15 to i32
   %sub41 = sub nsw i32 %conv38, %conv40
   %shl42 = shl nsw i32 %sub41, 16
@@ -1412,66 +1357,66 @@ entry:
   %sub51 = sub nsw i32 %add44, %add46
   %add55 = add nsw i32 %sub47, %sub45
   %sub59 = sub nsw i32 %sub45, %sub47
-  %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext
-  %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63
-  %16 = load i8, i8* %add.ptr, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %p1, i64 %idx.ext
+  %add.ptr64 = getelementptr inbounds i8, ptr %p2, i64 %idx.ext63
+  %16 = load i8, ptr %add.ptr, align 1
   %conv.1 = zext i8 %16 to i32
-  %17 = load i8, i8* %add.ptr64, align 1
+  %17 = load i8, ptr %add.ptr64, align 1
   %conv2.1 = zext i8 %17 to i32
   %sub.1 = sub nsw i32 %conv.1, %conv2.1
-  %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4
-  %18 = load i8, i8* %arrayidx3.1, align 1
+  %arrayidx3.1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
+  %18 = load i8, ptr %arrayidx3.1, align 1
   %conv4.1 = zext i8 %18 to i32
-  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
-  %19 = load i8, i8* %arrayidx5.1, align 1
+  %arrayidx5.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 4
+  %19 = load i8, ptr %arrayidx5.1, align 1
   %conv6.1 = zext i8 %19 to i32
   %sub7.1 = sub nsw i32 %conv4.1, %conv6.1
   %shl.1 = shl nsw i32 %sub7.1, 16
   %add.1 = add nsw i32 %shl.1, %sub.1
-  %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1
-  %20 = load i8, i8* %arrayidx8.1, align 1
+  %arrayidx8.1 = getelementptr inbounds i8, ptr %add.ptr, i64 1
+  %20 = load i8, ptr %arrayidx8.1, align 1
   %conv9.1 = zext i8 %20 to i32
-  %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1
-  %21 = load i8, i8* %arrayidx10.1, align 1
+  %arrayidx10.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 1
+  %21 = load i8, ptr %arrayidx10.1, align 1
   %conv11.1 = zext i8 %21 to i32
   %sub12.1 = sub nsw i32 %conv9.1, %conv11.1
-  %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
-  %22 = load i8, i8* %arrayidx13.1, align 1
+  %arrayidx13.1 = getelementptr inbounds i8, ptr %add.ptr, i64 5
+  %22 = load i8, ptr %arrayidx13.1, align 1
   %conv14.1 = zext i8 %22 to i32
-  %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5
-  %23 = load i8, i8* %arrayidx15.1, align 1
+  %arrayidx15.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 5
+  %23 = load i8, ptr %arrayidx15.1, align 1
   %conv16.1 = zext i8 %23 to i32
   %sub17.1 = sub nsw i32 %conv14.1, %conv16.1
   %shl18.1 = shl nsw i32 %sub17.1, 16
   %add19.1 = add nsw i32 %shl18.1, %sub12.1
-  %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2
-  %24 = load i8, i8* %arrayidx20.1, align 1
+  %arrayidx20.1 = getelementptr inbounds i8, ptr %add.ptr, i64 2
+  %24 = load i8, ptr %arrayidx20.1, align 1
   %conv21.1 = zext i8 %24 to i32
-  %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2
-  %25 = load i8, i8* %arrayidx22.1, align 1
+  %arrayidx22.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 2
+  %25 = load i8, ptr %arrayidx22.1, align 1
   %conv23.1 = zext i8 %25 to i32
   %sub24.1 = sub nsw i32 %conv21.1, %conv23.1
-  %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6
-  %26 = load i8, i8* %arrayidx25.1, align 1
+  %arrayidx25.1 = getelementptr inbounds i8, ptr %add.ptr, i64 6
+  %26 = load i8, ptr %arrayidx25.1, align 1
   %conv26.1 = zext i8 %26 to i32
-  %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6
-  %27 = load i8, i8* %arrayidx27.1, align 1
+  %arrayidx27.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 6
+  %27 = load i8, ptr %arrayidx27.1, align 1
   %conv28.1 = zext i8 %27 to i32
   %sub29.1 = sub nsw i32 %conv26.1, %conv28.1
   %shl30.1 = shl nsw i32 %sub29.1, 16
   %add31.1 = add nsw i32 %shl30.1, %sub24.1
-  %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3
-  %28 = load i8, i8* %arrayidx32.1, align 1
+  %arrayidx32.1 = getelementptr inbounds i8, ptr %add.ptr, i64 3
+  %28 = load i8, ptr %arrayidx32.1, align 1
   %conv33.1 = zext i8 %28 to i32
-  %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3
-  %29 = load i8, i8* %arrayidx34.1, align 1
+  %arrayidx34.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 3
+  %29 = load i8, ptr %arrayidx34.1, align 1
   %conv35.1 = zext i8 %29 to i32
   %sub36.1 = sub nsw i32 %conv33.1, %conv35.1
-  %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7
-  %30 = load i8, i8* %arrayidx37.1, align 1
+  %arrayidx37.1 = getelementptr inbounds i8, ptr %add.ptr, i64 7
+  %30 = load i8, ptr %arrayidx37.1, align 1
   %conv38.1 = zext i8 %30 to i32
-  %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7
-  %31 = load i8, i8* %arrayidx39.1, align 1
+  %arrayidx39.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 7
+  %31 = load i8, ptr %arrayidx39.1, align 1
   %conv40.1 = zext i8 %31 to i32
   %sub41.1 = sub nsw i32 %conv38.1, %conv40.1
   %shl42.1 = shl nsw i32 %sub41.1, 16
@@ -1484,66 +1429,66 @@ entry:
   %sub51.1 = sub nsw i32 %add44.1, %add46.1
   %add55.1 = add nsw i32 %sub47.1, %sub45.1
   %sub59.1 = sub nsw i32 %sub45.1, %sub47.1
-  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext
-  %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63
-  %32 = load i8, i8* %add.ptr.1, align 1
+  %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr, i64 %idx.ext
+  %add.ptr64.1 = getelementptr inbounds i8, ptr %add.ptr64, i64 %idx.ext63
+  %32 = load i8, ptr %add.ptr.1, align 1
   %conv.2 = zext i8 %32 to i32
-  %33 = load i8, i8* %add.ptr64.1, align 1
+  %33 = load i8, ptr %add.ptr64.1, align 1
   %conv2.2 = zext i8 %33 to i32
   %sub.2 = sub nsw i32 %conv.2, %conv2.2
-  %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4
-  %34 = load i8, i8* %arrayidx3.2, align 1
+  %arrayidx3.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 4
+  %34 = load i8, ptr %arrayidx3.2, align 1
   %conv4.2 = zext i8 %34 to i32
-  %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4
-  %35 = load i8, i8* %arrayidx5.2, align 1
+  %arrayidx5.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 4
+  %35 = load i8, ptr %arrayidx5.2, align 1
   %conv6.2 = zext i8 %35 to i32
   %sub7.2 = sub nsw i32 %conv4.2, %conv6.2
   %shl.2 = shl nsw i32 %sub7.2, 16
   %add.2 = add nsw i32 %shl.2, %sub.2
-  %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1
-  %36 = load i8, i8* %arrayidx8.2, align 1
+  %arrayidx8.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 1
+  %36 = load i8, ptr %arrayidx8.2, align 1
   %conv9.2 = zext i8 %36 to i32
-  %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1
-  %37 = load i8, i8* %arrayidx10.2, align 1
+  %arrayidx10.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 1
+  %37 = load i8, ptr %arrayidx10.2, align 1
   %conv11.2 = zext i8 %37 to i32
   %sub12.2 = sub nsw i32 %conv9.2, %conv11.2
-  %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5
-  %38 = load i8, i8* %arrayidx13.2, align 1
+  %arrayidx13.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 5
+  %38 = load i8, ptr %arrayidx13.2, align 1
   %conv14.2 = zext i8 %38 to i32
-  %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5
-  %39 = load i8, i8* %arrayidx15.2, align 1
+  %arrayidx15.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 5
+  %39 = load i8, ptr %arrayidx15.2, align 1
   %conv16.2 = zext i8 %39 to i32
   %sub17.2 = sub nsw i32 %conv14.2, %conv16.2
   %shl18.2 = shl nsw i32 %sub17.2, 16
   %add19.2 = add nsw i32 %shl18.2, %sub12.2
-  %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2
-  %40 = load i8, i8* %arrayidx20.2, align 1
+  %arrayidx20.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 2
+  %40 = load i8, ptr %arrayidx20.2, align 1
   %conv21.2 = zext i8 %40 to i32
-  %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2
-  %41 = load i8, i8* %arrayidx22.2, align 1
+  %arrayidx22.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 2
+  %41 = load i8, ptr %arrayidx22.2, align 1
   %conv23.2 = zext i8 %41 to i32
   %sub24.2 = sub nsw i32 %conv21.2, %conv23.2
-  %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6
-  %42 = load i8, i8* %arrayidx25.2, align 1
+  %arrayidx25.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 6
+  %42 = load i8, ptr %arrayidx25.2, align 1
   %conv26.2 = zext i8 %42 to i32
-  %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6
-  %43 = load i8, i8* %arrayidx27.2, align 1
+  %arrayidx27.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 6
+  %43 = load i8, ptr %arrayidx27.2, align 1
   %conv28.2 = zext i8 %43 to i32
   %sub29.2 = sub nsw i32 %conv26.2, %conv28.2
   %shl30.2 = shl nsw i32 %sub29.2, 16
   %add31.2 = add nsw i32 %shl30.2, %sub24.2
-  %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3
-  %44 = load i8, i8* %arrayidx32.2, align 1
+  %arrayidx32.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 3
+  %44 = load i8, ptr %arrayidx32.2, align 1
   %conv33.2 = zext i8 %44 to i32
-  %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3
-  %45 = load i8, i8* %arrayidx34.2, align 1
+  %arrayidx34.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 3
+  %45 = load i8, ptr %arrayidx34.2, align 1
   %conv35.2 = zext i8 %45 to i32
   %sub36.2 = sub nsw i32 %conv33.2, %conv35.2
-  %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7
-  %46 = load i8, i8* %arrayidx37.2, align 1
+  %arrayidx37.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 7
+  %46 = load i8, ptr %arrayidx37.2, align 1
   %conv38.2 = zext i8 %46 to i32
-  %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7
-  %47 = load i8, i8* %arrayidx39.2, align 1
+  %arrayidx39.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 7
+  %47 = load i8, ptr %arrayidx39.2, align 1
   %conv40.2 = zext i8 %47 to i32
   %sub41.2 = sub nsw i32 %conv38.2, %conv40.2
   %shl42.2 = shl nsw i32 %sub41.2, 16
@@ -1556,66 +1501,66 @@ entry:
   %sub51.2 = sub nsw i32 %add44.2, %add46.2
   %add55.2 = add nsw i32 %sub47.2, %sub45.2
   %sub59.2 = sub nsw i32 %sub45.2, %sub47.2
-  %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext
-  %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63
-  %48 = load i8, i8* %add.ptr.2, align 1
+  %add.ptr.2 = getelementptr inbounds i8, ptr %add.ptr.1, i64 %idx.ext
+  %add.ptr64.2 = getelementptr inbounds i8, ptr %add.ptr64.1, i64 %idx.ext63
+  %48 = load i8, ptr %add.ptr.2, align 1
   %conv.3 = zext i8 %48 to i32
-  %49 = load i8, i8* %add.ptr64.2, align 1
+  %49 = load i8, ptr %add.ptr64.2, align 1
   %conv2.3 = zext i8 %49 to i32
   %sub.3 = sub nsw i32 %conv.3, %conv2.3
-  %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4
-  %50 = load i8, i8* %arrayidx3.3, align 1
+  %arrayidx3.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 4
+  %50 = load i8, ptr %arrayidx3.3, align 1
   %conv4.3 = zext i8 %50 to i32
-  %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4
-  %51 = load i8, i8* %arrayidx5.3, align 1
+  %arrayidx5.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 4
+  %51 = load i8, ptr %arrayidx5.3, align 1
   %conv6.3 = zext i8 %51 to i32
   %sub7.3 = sub nsw i32 %conv4.3, %conv6.3
   %shl.3 = shl nsw i32 %sub7.3, 16
   %add.3 = add nsw i32 %shl.3, %sub.3
-  %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1
-  %52 = load i8, i8* %arrayidx8.3, align 1
+  %arrayidx8.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 1
+  %52 = load i8, ptr %arrayidx8.3, align 1
   %conv9.3 = zext i8 %52 to i32
-  %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1
-  %53 = load i8, i8* %arrayidx10.3, align 1
+  %arrayidx10.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 1
+  %53 = load i8, ptr %arrayidx10.3, align 1
   %conv11.3 = zext i8 %53 to i32
   %sub12.3 = sub nsw i32 %conv9.3, %conv11.3
-  %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5
-  %54 = load i8, i8* %arrayidx13.3, align 1
+  %arrayidx13.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 5
+  %54 = load i8, ptr %arrayidx13.3, align 1
   %conv14.3 = zext i8 %54 to i32
-  %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5
-  %55 = load i8, i8* %arrayidx15.3, align 1
+  %arrayidx15.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 5
+  %55 = load i8, ptr %arrayidx15.3, align 1
   %conv16.3 = zext i8 %55 to i32
   %sub17.3 = sub nsw i32 %conv14.3, %conv16.3
   %shl18.3 = shl nsw i32 %sub17.3, 16
   %add19.3 = add nsw i32 %shl18.3, %sub12.3
-  %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2
-  %56 = load i8, i8* %arrayidx20.3, align 1
+  %arrayidx20.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 2
+  %56 = load i8, ptr %arrayidx20.3, align 1
   %conv21.3 = zext i8 %56 to i32
-  %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2
-  %57 = load i8, i8* %arrayidx22.3, align 1
+  %arrayidx22.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 2
+  %57 = load i8, ptr %arrayidx22.3, align 1
   %conv23.3 = zext i8 %57 to i32
   %sub24.3 = sub nsw i32 %conv21.3, %conv23.3
-  %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6
-  %58 = load i8, i8* %arrayidx25.3, align 1
+  %arrayidx25.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 6
+  %58 = load i8, ptr %arrayidx25.3, align 1
   %conv26.3 = zext i8 %58 to i32
-  %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6
-  %59 = load i8, i8* %arrayidx27.3, align 1
+  %arrayidx27.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 6
+  %59 = load i8, ptr %arrayidx27.3, align 1
   %conv28.3 = zext i8 %59 to i32
   %sub29.3 = sub nsw i32 %conv26.3, %conv28.3
   %shl30.3 = shl nsw i32 %sub29.3, 16
   %add31.3 = add nsw i32 %shl30.3, %sub24.3
-  %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3
-  %60 = load i8, i8* %arrayidx32.3, align 1
+  %arrayidx32.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 3
+  %60 = load i8, ptr %arrayidx32.3, align 1
   %conv33.3 = zext i8 %60 to i32
-  %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3
-  %61 = load i8, i8* %arrayidx34.3, align 1
+  %arrayidx34.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 3
+  %61 = load i8, ptr %arrayidx34.3, align 1
   %conv35.3 = zext i8 %61 to i32
   %sub36.3 = sub nsw i32 %conv33.3, %conv35.3
-  %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7
-  %62 = load i8, i8* %arrayidx37.3, align 1
+  %arrayidx37.3 = getelementptr inbounds i8, ptr %add.ptr.2, i64 7
+  %62 = load i8, ptr %arrayidx37.3, align 1
   %conv38.3 = zext i8 %62 to i32
-  %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7
-  %63 = load i8, i8* %arrayidx39.3, align 1
+  %arrayidx39.3 = getelementptr inbounds i8, ptr %add.ptr64.2, i64 7
+  %63 = load i8, ptr %arrayidx39.3, align 1
   %conv40.3 = zext i8 %63 to i32
   %sub41.3 = sub nsw i32 %conv38.3, %conv40.3
   %shl42.3 = shl nsw i32 %sub41.3, 16

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
index 9889d8d9e444a..37384259e015c 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll
@@ -8,44 +8,36 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; The operations here are expected to be vectorized to <2 x double>.
 ; Otherwise, performance will suffer on Cortex-A53.
 
-define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly %A, [4 x double]* nocapture readonly %B) {
+define void @wrap_mul4(ptr nocapture %Out, ptr nocapture readonly %A, ptr nocapture readonly %B) {
 ; CHECK-LABEL: @wrap_mul4(
-; CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[TEMP:%.*]] = load double, double* [[ARRAYIDX1_I]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 0, i64 1
-; CHECK-NEXT:    [[TEMP2:%.*]] = load double, double* [[ARRAYIDX5_I]], align 8
-; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 0
-; CHECK-NEXT:    [[ARRAYIDX25_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 0, i64 2
-; CHECK-NEXT:    [[ARRAYIDX30_I:%.*]] = getelementptr inbounds [4 x double], [4 x double]* [[B]], i64 1, i64 2
-; CHECK-NEXT:    [[ARRAYIDX47_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 1, i64 0
-; CHECK-NEXT:    [[TEMP10:%.*]] = load double, double* [[ARRAYIDX47_I]], align 8
-; CHECK-NEXT:    [[ARRAYIDX52_I:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[A]], i64 1, i64 1
-; CHECK-NEXT:    [[TEMP11:%.*]] = load double, double* [[ARRAYIDX52_I]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[ARRAYIDX3_I]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TEMP:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x double], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[TEMP2:%.*]] = load double, ptr [[ARRAYIDX5_I]], align 8
+; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B:%.*]], i64 1, i64 0
+; CHECK-NEXT:    [[ARRAYIDX25_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 0, i64 2
+; CHECK-NEXT:    [[ARRAYIDX30_I:%.*]] = getelementptr inbounds [4 x double], ptr [[B]], i64 1, i64 2
+; CHECK-NEXT:    [[ARRAYIDX47_I:%.*]] = getelementptr inbounds [2 x double], ptr [[A]], i64 1, i64 0
+; CHECK-NEXT:    [[TEMP10:%.*]] = load double, ptr [[ARRAYIDX47_I]], align 8
+; CHECK-NEXT:    [[ARRAYIDX52_I:%.*]] = getelementptr inbounds [2 x double], ptr [[A]], i64 1, i64 1
+; CHECK-NEXT:    [[TEMP11:%.*]] = load double, ptr [[ARRAYIDX52_I]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[B]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX7_I]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 2
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>*
-; CHECK-NEXT:    [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[TMP11]], align 8
+; CHECK-NEXT:    [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], i64 2
+; CHECK-NEXT:    [[TMP12:%.*]] = load <2 x double>, ptr [[ARRAYIDX25_I]], align 8
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>*
-; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[TMP14]], align 8
+; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x double>, ptr [[ARRAYIDX30_I]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fadd <2 x double> [[TMP13]], [[TMP16]]
-; CHECK-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8
-; CHECK-NEXT:    [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 4
+; CHECK-NEXT:    store <2 x double> [[TMP9]], ptr [[OUT]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP17]], ptr [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]], align 8
+; CHECK-NEXT:    [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE4:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE4]]
@@ -53,53 +45,49 @@ define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly
 ; CHECK-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE5]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = fadd <2 x double> [[TMP20]], [[TMP22]]
-; CHECK-NEXT:    [[TMP24:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP23]], <2 x double>* [[TMP24]], align 8
-; CHECK-NEXT:    [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6
+; CHECK-NEXT:    store <2 x double> [[TMP23]], ptr [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]], align 8
+; CHECK-NEXT:    [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, ptr [[OUT]], i64 6
 ; CHECK-NEXT:    [[TMP25:%.*]] = fmul <2 x double> [[TMP12]], [[SHUFFLE4]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fmul <2 x double> [[TMP15]], [[SHUFFLE5]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = fadd <2 x double> [[TMP25]], [[TMP26]]
-; CHECK-NEXT:    [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP27]], <2 x double>* [[TMP28]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP27]], ptr [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %arrayidx1.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 0, i64 0
-  %temp = load double, double* %arrayidx1.i, align 8
-  %arrayidx3.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 0
-  %temp1 = load double, double* %arrayidx3.i, align 8
+  %temp = load double, ptr %A, align 8
+  %temp1 = load double, ptr %B, align 8
   %mul.i = fmul double %temp, %temp1
-  %arrayidx5.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 0, i64 1
-  %temp2 = load double, double* %arrayidx5.i, align 8
-  %arrayidx7.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 0
-  %temp3 = load double, double* %arrayidx7.i, align 8
+  %arrayidx5.i = getelementptr inbounds [2 x double], ptr %A, i64 0, i64 1
+  %temp2 = load double, ptr %arrayidx5.i, align 8
+  %arrayidx7.i = getelementptr inbounds [4 x double], ptr %B, i64 1, i64 0
+  %temp3 = load double, ptr %arrayidx7.i, align 8
   %mul8.i = fmul double %temp2, %temp3
   %add.i = fadd double %mul.i, %mul8.i
-  %arrayidx13.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 1
-  %temp4 = load double, double* %arrayidx13.i, align 8
+  %arrayidx13.i = getelementptr inbounds [4 x double], ptr %B, i64 0, i64 1
+  %temp4 = load double, ptr %arrayidx13.i, align 8
   %mul14.i = fmul double %temp, %temp4
-  %arrayidx18.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 1
-  %temp5 = load double, double* %arrayidx18.i, align 8
+  %arrayidx18.i = getelementptr inbounds [4 x double], ptr %B, i64 1, i64 1
+  %temp5 = load double, ptr %arrayidx18.i, align 8
   %mul19.i = fmul double %temp2, %temp5
   %add20.i = fadd double %mul14.i, %mul19.i
-  %arrayidx25.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 2
-  %temp6 = load double, double* %arrayidx25.i, align 8
+  %arrayidx25.i = getelementptr inbounds [4 x double], ptr %B, i64 0, i64 2
+  %temp6 = load double, ptr %arrayidx25.i, align 8
   %mul26.i = fmul double %temp, %temp6
-  %arrayidx30.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 2
-  %temp7 = load double, double* %arrayidx30.i, align 8
+  %arrayidx30.i = getelementptr inbounds [4 x double], ptr %B, i64 1, i64 2
+  %temp7 = load double, ptr %arrayidx30.i, align 8
   %mul31.i = fmul double %temp2, %temp7
   %add32.i = fadd double %mul26.i, %mul31.i
-  %arrayidx37.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 3
-  %temp8 = load double, double* %arrayidx37.i, align 8
+  %arrayidx37.i = getelementptr inbounds [4 x double], ptr %B, i64 0, i64 3
+  %temp8 = load double, ptr %arrayidx37.i, align 8
   %mul38.i = fmul double %temp, %temp8
-  %arrayidx42.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 3
-  %temp9 = load double, double* %arrayidx42.i, align 8
+  %arrayidx42.i = getelementptr inbounds [4 x double], ptr %B, i64 1, i64 3
+  %temp9 = load double, ptr %arrayidx42.i, align 8
   %mul43.i = fmul double %temp2, %temp9
   %add44.i = fadd double %mul38.i, %mul43.i
-  %arrayidx47.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 1, i64 0
-  %temp10 = load double, double* %arrayidx47.i, align 8
+  %arrayidx47.i = getelementptr inbounds [2 x double], ptr %A, i64 1, i64 0
+  %temp10 = load double, ptr %arrayidx47.i, align 8
   %mul50.i = fmul double %temp1, %temp10
-  %arrayidx52.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 1, i64 1
-  %temp11 = load double, double* %arrayidx52.i, align 8
+  %arrayidx52.i = getelementptr inbounds [2 x double], ptr %A, i64 1, i64 1
+  %temp11 = load double, ptr %arrayidx52.i, align 8
   %mul55.i = fmul double %temp3, %temp11
   %add56.i = fadd double %mul50.i, %mul55.i
   %mul62.i = fmul double %temp4, %temp10
@@ -111,21 +99,21 @@ define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly
   %mul86.i = fmul double %temp8, %temp10
   %mul91.i = fmul double %temp9, %temp11
   %add92.i = fadd double %mul86.i, %mul91.i
-  store double %add.i, double* %Out, align 8
-  %Res.i.sroa.4.0.Out2.i.sroa_idx2 = getelementptr inbounds double, double* %Out, i64 1
-  store double %add20.i, double* %Res.i.sroa.4.0.Out2.i.sroa_idx2, align 8
-  %Res.i.sroa.5.0.Out2.i.sroa_idx4 = getelementptr inbounds double, double* %Out, i64 2
-  store double %add32.i, double* %Res.i.sroa.5.0.Out2.i.sroa_idx4, align 8
-  %Res.i.sroa.6.0.Out2.i.sroa_idx6 = getelementptr inbounds double, double* %Out, i64 3
-  store double %add44.i, double* %Res.i.sroa.6.0.Out2.i.sroa_idx6, align 8
-  %Res.i.sroa.7.0.Out2.i.sroa_idx8 = getelementptr inbounds double, double* %Out, i64 4
-  store double %add56.i, double* %Res.i.sroa.7.0.Out2.i.sroa_idx8, align 8
-  %Res.i.sroa.8.0.Out2.i.sroa_idx10 = getelementptr inbounds double, double* %Out, i64 5
-  store double %add68.i, double* %Res.i.sroa.8.0.Out2.i.sroa_idx10, align 8
-  %Res.i.sroa.9.0.Out2.i.sroa_idx12 = getelementptr inbounds double, double* %Out, i64 6
-  store double %add80.i, double* %Res.i.sroa.9.0.Out2.i.sroa_idx12, align 8
-  %Res.i.sroa.10.0.Out2.i.sroa_idx14 = getelementptr inbounds double, double* %Out, i64 7
-  store double %add92.i, double* %Res.i.sroa.10.0.Out2.i.sroa_idx14, align 8
+  store double %add.i, ptr %Out, align 8
+  %Res.i.sroa.4.0.Out2.i.sroa_idx2 = getelementptr inbounds double, ptr %Out, i64 1
+  store double %add20.i, ptr %Res.i.sroa.4.0.Out2.i.sroa_idx2, align 8
+  %Res.i.sroa.5.0.Out2.i.sroa_idx4 = getelementptr inbounds double, ptr %Out, i64 2
+  store double %add32.i, ptr %Res.i.sroa.5.0.Out2.i.sroa_idx4, align 8
+  %Res.i.sroa.6.0.Out2.i.sroa_idx6 = getelementptr inbounds double, ptr %Out, i64 3
+  store double %add44.i, ptr %Res.i.sroa.6.0.Out2.i.sroa_idx6, align 8
+  %Res.i.sroa.7.0.Out2.i.sroa_idx8 = getelementptr inbounds double, ptr %Out, i64 4
+  store double %add56.i, ptr %Res.i.sroa.7.0.Out2.i.sroa_idx8, align 8
+  %Res.i.sroa.8.0.Out2.i.sroa_idx10 = getelementptr inbounds double, ptr %Out, i64 5
+  store double %add68.i, ptr %Res.i.sroa.8.0.Out2.i.sroa_idx10, align 8
+  %Res.i.sroa.9.0.Out2.i.sroa_idx12 = getelementptr inbounds double, ptr %Out, i64 6
+  store double %add80.i, ptr %Res.i.sroa.9.0.Out2.i.sroa_idx12, align 8
+  %Res.i.sroa.10.0.Out2.i.sroa_idx14 = getelementptr inbounds double, ptr %Out, i64 7
+  store double %add92.i, ptr %Res.i.sroa.10.0.Out2.i.sroa_idx14, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks-in-loops.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks-in-loops.ll
index 260dab7b5389c..f6995cc6043c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks-in-loops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks-in-loops.ll
@@ -2,43 +2,43 @@
 ; RUN: opt -passes=slp-vectorizer  -mtriple=arm64-apple-ios -S %s | FileCheck %s
 ; RUN: opt -aa-pipeline='basic-aa,scoped-noalias-aa' -passes=slp-vectorizer -mtriple=arm64-apple-darwin -S %s | FileCheck %s
 
-define void @loop1(i32* %A, i32* %B, i64 %N) {
+define void @loop1(ptr %A, ptr %B, i64 %N) {
 ; CHECK-LABEL: @loop1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[B_0:%.*]] = load i32, i32* [[B_GEP_0]], align 4
-; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[A_0:%.*]] = load i32, i32* [[A_GEP_0]], align 4
+; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[B_0:%.*]] = load i32, ptr [[B_GEP_0]], align 4
+; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[A_0:%.*]] = load i32, ptr [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[ADD_0:%.*]] = add i32 [[A_0]], 20
 ; CHECK-NEXT:    [[XOR_0:%.*]] = xor i32 [[ADD_0]], [[B_0]]
-; CHECK-NEXT:    store i32 [[XOR_0]], i32* [[A_GEP_0]], align 4
+; CHECK-NEXT:    store i32 [[XOR_0]], ptr [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[IV_1:%.*]] = or i64 [[IV]], 1
-; CHECK-NEXT:    [[B_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV_1]]
-; CHECK-NEXT:    [[B_1:%.*]] = load i32, i32* [[B_GEP_1]], align 4
-; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV_1]]
-; CHECK-NEXT:    [[A_1:%.*]] = load i32, i32* [[A_GEP_1]], align 4
+; CHECK-NEXT:    [[B_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_1]]
+; CHECK-NEXT:    [[B_1:%.*]] = load i32, ptr [[B_GEP_1]], align 4
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_1]]
+; CHECK-NEXT:    [[A_1:%.*]] = load i32, ptr [[A_GEP_1]], align 4
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[A_1]], 20
 ; CHECK-NEXT:    [[XOR_1:%.*]] = xor i32 [[ADD_1]], [[B_1]]
-; CHECK-NEXT:    store i32 [[XOR_1]], i32* [[A_GEP_1]], align 4
+; CHECK-NEXT:    store i32 [[XOR_1]], ptr [[A_GEP_1]], align 4
 ; CHECK-NEXT:    [[IV_2:%.*]] = or i64 [[IV]], 2
-; CHECK-NEXT:    [[B_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV_2]]
-; CHECK-NEXT:    [[B_2:%.*]] = load i32, i32* [[B_GEP_2]], align 4
-; CHECK-NEXT:    [[A_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV_2]]
-; CHECK-NEXT:    [[A_2:%.*]] = load i32, i32* [[A_GEP_2]], align 4
+; CHECK-NEXT:    [[B_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_2]]
+; CHECK-NEXT:    [[B_2:%.*]] = load i32, ptr [[B_GEP_2]], align 4
+; CHECK-NEXT:    [[A_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_2]]
+; CHECK-NEXT:    [[A_2:%.*]] = load i32, ptr [[A_GEP_2]], align 4
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[A_2]], 20
 ; CHECK-NEXT:    [[XOR_2:%.*]] = xor i32 [[ADD_2]], [[B_2]]
-; CHECK-NEXT:    store i32 [[XOR_2]], i32* [[A_GEP_2]], align 4
+; CHECK-NEXT:    store i32 [[XOR_2]], ptr [[A_GEP_2]], align 4
 ; CHECK-NEXT:    [[IV_3:%.*]] = or i64 [[IV]], 3
-; CHECK-NEXT:    [[B_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV_3]]
-; CHECK-NEXT:    [[B_3:%.*]] = load i32, i32* [[B_GEP_3]], align 4
-; CHECK-NEXT:    [[A_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV_3]]
-; CHECK-NEXT:    [[A_3:%.*]] = load i32, i32* [[A_GEP_3]], align 4
+; CHECK-NEXT:    [[B_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_3]]
+; CHECK-NEXT:    [[B_3:%.*]] = load i32, ptr [[B_GEP_3]], align 4
+; CHECK-NEXT:    [[A_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_3]]
+; CHECK-NEXT:    [[A_3:%.*]] = load i32, ptr [[A_GEP_3]], align 4
 ; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[A_3]], 20
 ; CHECK-NEXT:    [[XOR_3:%.*]] = xor i32 [[ADD_3]], [[B_3]]
-; CHECK-NEXT:    store i32 [[XOR_3]], i32* [[A_GEP_3]], align 4
+; CHECK-NEXT:    store i32 [[XOR_3]], ptr [[A_GEP_3]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 16
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ult i64 [[IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
@@ -50,37 +50,37 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %B.gep.0 = getelementptr inbounds i32, i32* %B, i64 %iv
-  %B.0 = load i32, i32* %B.gep.0, align 4
-  %A.gep.0 = getelementptr inbounds i32, i32* %A, i64 %iv
-  %A.0 = load i32, i32* %A.gep.0, align 4
+  %B.gep.0 = getelementptr inbounds i32, ptr %B, i64 %iv
+  %B.0 = load i32, ptr %B.gep.0, align 4
+  %A.gep.0 = getelementptr inbounds i32, ptr %A, i64 %iv
+  %A.0 = load i32, ptr %A.gep.0, align 4
   %add.0 = add i32  %A.0, 20
   %xor.0 = xor i32 %add.0, %B.0
-  store i32 %xor.0, i32* %A.gep.0, align 4
+  store i32 %xor.0, ptr %A.gep.0, align 4
   %iv.1 = or i64 %iv, 1
-  %B.gep.1 = getelementptr inbounds i32, i32* %B, i64 %iv.1
-  %B.1 = load i32, i32* %B.gep.1, align 4
-  %A.gep.1 = getelementptr inbounds i32, i32* %A, i64 %iv.1
-  %A.1 = load i32, i32* %A.gep.1, align 4
+  %B.gep.1 = getelementptr inbounds i32, ptr %B, i64 %iv.1
+  %B.1 = load i32, ptr %B.gep.1, align 4
+  %A.gep.1 = getelementptr inbounds i32, ptr %A, i64 %iv.1
+  %A.1 = load i32, ptr %A.gep.1, align 4
   %add.1 = add i32  %A.1, 20
   %xor.1 = xor i32 %add.1, %B.1
-  store i32 %xor.1, i32* %A.gep.1, align 4
+  store i32 %xor.1, ptr %A.gep.1, align 4
   %iv.2 = or i64 %iv, 2
-  %B.gep.2 = getelementptr inbounds i32, i32* %B, i64 %iv.2
-  %B.2 = load i32, i32* %B.gep.2, align 4
-  %A.gep.2 = getelementptr inbounds i32, i32* %A, i64 %iv.2
-  %A.2 = load i32, i32* %A.gep.2, align 4
+  %B.gep.2 = getelementptr inbounds i32, ptr %B, i64 %iv.2
+  %B.2 = load i32, ptr %B.gep.2, align 4
+  %A.gep.2 = getelementptr inbounds i32, ptr %A, i64 %iv.2
+  %A.2 = load i32, ptr %A.gep.2, align 4
   %add.2 = add i32  %A.2, 20
   %xor.2 = xor i32 %add.2, %B.2
-  store i32 %xor.2, i32* %A.gep.2, align 4
+  store i32 %xor.2, ptr %A.gep.2, align 4
   %iv.3 = or i64 %iv, 3
-  %B.gep.3 = getelementptr inbounds i32, i32* %B, i64 %iv.3
-  %B.3 = load i32, i32* %B.gep.3, align 4
-  %A.gep.3 = getelementptr inbounds i32, i32* %A, i64 %iv.3
-  %A.3 = load i32, i32* %A.gep.3, align 4
+  %B.gep.3 = getelementptr inbounds i32, ptr %B, i64 %iv.3
+  %B.3 = load i32, ptr %B.gep.3, align 4
+  %A.gep.3 = getelementptr inbounds i32, ptr %A, i64 %iv.3
+  %A.3 = load i32, ptr %A.gep.3, align 4
   %add.3 = add i32  %A.3, 20
   %xor.3 = xor i32 %add.3, %B.3
-  store i32 %xor.3, i32* %A.gep.3, align 4
+  store i32 %xor.3, ptr %A.gep.3, align 4
   %iv.next = add nuw nsw i64 %iv, 16
   %cond = icmp ult i64 %iv.next, %N
   br i1 %cond, label %loop, label %exit
@@ -89,7 +89,7 @@ exit:
   ret void
 }
 
-define void @loop_iv_update_at_start(float* %src, float* %dst) #0 {
+define void @loop_iv_update_at_start(ptr %src, ptr %dst) #0 {
 ; CHECK-LABEL: @loop_iv_update_at_start(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -97,36 +97,34 @@ define void @loop_iv_update_at_start(float* %src, float* %dst) #0 {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[IV]], 2000
-; CHECK-NEXT:    [[SRC_GEP_0:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 0
-; CHECK-NEXT:    [[SRC_0:%.*]] = load float, float* [[SRC_GEP_0]], align 8
+; CHECK-NEXT:    [[SRC_0:%.*]] = load float, ptr [[SRC:%.*]], align 8
 ; CHECK-NEXT:    [[ADD_0:%.*]] = fadd float [[SRC_0]], 1.000000e+00
 ; CHECK-NEXT:    [[MUL_0:%.*]] = fmul float [[ADD_0]], [[SRC_0]]
-; CHECK-NEXT:    [[DST_GEP_0:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 0
-; CHECK-NEXT:    store float [[MUL_0]], float* [[DST_GEP_0]], align 8
-; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 1
-; CHECK-NEXT:    [[SRC_1:%.*]] = load float, float* [[SRC_GEP_1]], align 8
+; CHECK-NEXT:    store float [[MUL_0]], ptr [[DST:%.*]], align 8
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load float, ptr [[SRC_GEP_1]], align 8
 ; CHECK-NEXT:    [[ADD_1:%.*]] = fadd float [[SRC_1]], 1.000000e+00
 ; CHECK-NEXT:    [[MUL_1:%.*]] = fmul float [[ADD_1]], [[SRC_1]]
-; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds float, float* [[DST]], i64 1
-; CHECK-NEXT:    store float [[MUL_1]], float* [[DST_GEP_1]], align 8
-; CHECK-NEXT:    [[SRC_GEP_2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2
-; CHECK-NEXT:    [[SRC_2:%.*]] = load float, float* [[SRC_GEP_2]], align 8
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1
+; CHECK-NEXT:    store float [[MUL_1]], ptr [[DST_GEP_1]], align 8
+; CHECK-NEXT:    [[SRC_GEP_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
+; CHECK-NEXT:    [[SRC_2:%.*]] = load float, ptr [[SRC_GEP_2]], align 8
 ; CHECK-NEXT:    [[ADD_2:%.*]] = fadd float [[SRC_2]], 1.000000e+00
 ; CHECK-NEXT:    [[MUL_2:%.*]] = fmul float [[ADD_2]], [[SRC_2]]
-; CHECK-NEXT:    [[DST_GEP_2:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2
-; CHECK-NEXT:    store float [[MUL_2]], float* [[DST_GEP_2]], align 8
-; CHECK-NEXT:    [[SRC_GEP_3:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3
-; CHECK-NEXT:    [[SRC_3:%.*]] = load float, float* [[SRC_GEP_3]], align 8
+; CHECK-NEXT:    [[DST_GEP_2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
+; CHECK-NEXT:    store float [[MUL_2]], ptr [[DST_GEP_2]], align 8
+; CHECK-NEXT:    [[SRC_GEP_3:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[SRC_3:%.*]] = load float, ptr [[SRC_GEP_3]], align 8
 ; CHECK-NEXT:    [[ADD_3:%.*]] = fadd float [[SRC_3]], 1.000000e+00
 ; CHECK-NEXT:    [[MUL_3:%.*]] = fmul float [[ADD_3]], [[SRC_3]]
-; CHECK-NEXT:    [[DST_GEP_3:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3
-; CHECK-NEXT:    store float [[MUL_3]], float* [[DST_GEP_3]], align 8
-; CHECK-NEXT:    [[SRC_GEP_4:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 4
-; CHECK-NEXT:    [[SRC_4:%.*]] = load float, float* [[SRC_GEP_4]], align 8
+; CHECK-NEXT:    [[DST_GEP_3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
+; CHECK-NEXT:    store float [[MUL_3]], ptr [[DST_GEP_3]], align 8
+; CHECK-NEXT:    [[SRC_GEP_4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 4
+; CHECK-NEXT:    [[SRC_4:%.*]] = load float, ptr [[SRC_GEP_4]], align 8
 ; CHECK-NEXT:    [[ADD_4:%.*]] = fadd float [[SRC_4]], 1.000000e+00
 ; CHECK-NEXT:    [[MUL_4:%.*]] = fmul float [[ADD_4]], [[SRC_4]]
-; CHECK-NEXT:    [[DST_GEP_4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 4
-; CHECK-NEXT:    store float [[MUL_4]], float* [[DST_GEP_4]], align 8
+; CHECK-NEXT:    [[DST_GEP_4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 4
+; CHECK-NEXT:    store float [[MUL_4]], ptr [[DST_GEP_4]], align 8
 ; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
@@ -139,37 +137,35 @@ loop:
   %iv.next = add i32 %iv, 1
   %cond = icmp ult i32 %iv, 2000
 
-  %src.gep.0 = getelementptr inbounds float, float* %src, i64 0
-  %src.0 = load float, float* %src.gep.0, align 8
+  %src.0 = load float, ptr %src, align 8
   %add.0 = fadd float %src.0, 1.0
   %mul.0 = fmul float %add.0, %src.0
-  %dst.gep.0 = getelementptr inbounds float, float* %dst, i64 0
-  store float %mul.0, float* %dst.gep.0, align 8
+  store float %mul.0, ptr %dst, align 8
 
-  %src.gep.1 = getelementptr inbounds float, float* %src, i64 1
-  %src.1 = load float, float* %src.gep.1, align 8
+  %src.gep.1 = getelementptr inbounds float, ptr %src, i64 1
+  %src.1 = load float, ptr %src.gep.1, align 8
   %add.1 = fadd float %src.1, 1.0
   %mul.1 = fmul float %add.1, %src.1
-  %dst.gep.1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %mul.1, float* %dst.gep.1, align 8
-  %src.gep.2 = getelementptr inbounds float, float* %src, i64 2
-  %src.2 = load float, float* %src.gep.2, align 8
+  %dst.gep.1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %mul.1, ptr %dst.gep.1, align 8
+  %src.gep.2 = getelementptr inbounds float, ptr %src, i64 2
+  %src.2 = load float, ptr %src.gep.2, align 8
   %add.2 = fadd float %src.2, 1.0
   %mul.2 = fmul float %add.2, %src.2
-  %dst.gep.2 = getelementptr inbounds float, float* %dst, i64 2
-  store float %mul.2, float* %dst.gep.2, align 8
-  %src.gep.3 = getelementptr inbounds float, float* %src, i64 3
-  %src.3 = load float, float* %src.gep.3, align 8
+  %dst.gep.2 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %mul.2, ptr %dst.gep.2, align 8
+  %src.gep.3 = getelementptr inbounds float, ptr %src, i64 3
+  %src.3 = load float, ptr %src.gep.3, align 8
   %add.3 = fadd float %src.3, 1.0
   %mul.3 = fmul float %add.3, %src.3
-  %dst.gep.3 = getelementptr inbounds float, float* %dst, i64 3
-  store float %mul.3, float* %dst.gep.3, align 8
-  %src.gep.4 = getelementptr inbounds float, float* %src, i64 4
-  %src.4 = load float, float* %src.gep.4, align 8
+  %dst.gep.3 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %mul.3, ptr %dst.gep.3, align 8
+  %src.gep.4 = getelementptr inbounds float, ptr %src, i64 4
+  %src.4 = load float, ptr %src.gep.4, align 8
   %add.4 = fadd float %src.4, 1.0
   %mul.4 = fmul float %add.4, %src.4
-  %dst.gep.4 = getelementptr inbounds float, float* %dst, i64 4
-  store float %mul.4, float* %dst.gep.4, align 8
+  %dst.gep.4 = getelementptr inbounds float, ptr %dst, i64 4
+  store float %mul.4, ptr %dst.gep.4, align 8
   br i1 %cond, label %loop, label %exit
 
 exit:
@@ -177,44 +173,44 @@ exit:
 }
 
 ; Similar to @loop1, but a load is used in a phi in the same basic block.
-define i32 @value_used_in_phi(i32* %A, i32* %B, i64 %N) {
+define i32 @value_used_in_phi(ptr %A, ptr %B, i64 %N) {
 ; CHECK-LABEL: @value_used_in_phi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[A_3:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[B_0:%.*]] = load i32, i32* [[B_GEP_0]], align 4
-; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[A_0:%.*]] = load i32, i32* [[A_GEP_0]], align 4
+; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[B_0:%.*]] = load i32, ptr [[B_GEP_0]], align 4
+; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[A_0:%.*]] = load i32, ptr [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[ADD_0:%.*]] = add i32 [[A_0]], 20
 ; CHECK-NEXT:    [[XOR_0:%.*]] = xor i32 [[ADD_0]], [[B_0]]
-; CHECK-NEXT:    store i32 [[XOR_0]], i32* [[A_GEP_0]], align 4
+; CHECK-NEXT:    store i32 [[XOR_0]], ptr [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[IV_1:%.*]] = or i64 [[IV]], 1
-; CHECK-NEXT:    [[B_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV_1]]
-; CHECK-NEXT:    [[B_1:%.*]] = load i32, i32* [[B_GEP_1]], align 4
-; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV_1]]
-; CHECK-NEXT:    [[A_1:%.*]] = load i32, i32* [[A_GEP_1]], align 4
+; CHECK-NEXT:    [[B_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_1]]
+; CHECK-NEXT:    [[B_1:%.*]] = load i32, ptr [[B_GEP_1]], align 4
+; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_1]]
+; CHECK-NEXT:    [[A_1:%.*]] = load i32, ptr [[A_GEP_1]], align 4
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[A_1]], 20
 ; CHECK-NEXT:    [[XOR_1:%.*]] = xor i32 [[ADD_1]], [[B_1]]
-; CHECK-NEXT:    store i32 [[XOR_1]], i32* [[A_GEP_1]], align 4
+; CHECK-NEXT:    store i32 [[XOR_1]], ptr [[A_GEP_1]], align 4
 ; CHECK-NEXT:    [[IV_2:%.*]] = or i64 [[IV]], 2
-; CHECK-NEXT:    [[B_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV_2]]
-; CHECK-NEXT:    [[B_2:%.*]] = load i32, i32* [[B_GEP_2]], align 4
-; CHECK-NEXT:    [[A_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV_2]]
-; CHECK-NEXT:    [[A_2:%.*]] = load i32, i32* [[A_GEP_2]], align 4
+; CHECK-NEXT:    [[B_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_2]]
+; CHECK-NEXT:    [[B_2:%.*]] = load i32, ptr [[B_GEP_2]], align 4
+; CHECK-NEXT:    [[A_GEP_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_2]]
+; CHECK-NEXT:    [[A_2:%.*]] = load i32, ptr [[A_GEP_2]], align 4
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[A_2]], 20
 ; CHECK-NEXT:    [[XOR_2:%.*]] = xor i32 [[ADD_2]], [[B_2]]
-; CHECK-NEXT:    store i32 [[XOR_2]], i32* [[A_GEP_2]], align 4
+; CHECK-NEXT:    store i32 [[XOR_2]], ptr [[A_GEP_2]], align 4
 ; CHECK-NEXT:    [[IV_3:%.*]] = or i64 [[IV]], 3
-; CHECK-NEXT:    [[B_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV_3]]
-; CHECK-NEXT:    [[B_3:%.*]] = load i32, i32* [[B_GEP_3]], align 4
-; CHECK-NEXT:    [[A_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV_3]]
-; CHECK-NEXT:    [[A_3]] = load i32, i32* [[A_GEP_3]], align 4
+; CHECK-NEXT:    [[B_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_3]]
+; CHECK-NEXT:    [[B_3:%.*]] = load i32, ptr [[B_GEP_3]], align 4
+; CHECK-NEXT:    [[A_GEP_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_3]]
+; CHECK-NEXT:    [[A_3]] = load i32, ptr [[A_GEP_3]], align 4
 ; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[A_3]], 20
 ; CHECK-NEXT:    [[XOR_3:%.*]] = xor i32 [[ADD_3]], [[B_3]]
-; CHECK-NEXT:    store i32 [[XOR_3]], i32* [[A_GEP_3]], align 4
+; CHECK-NEXT:    store i32 [[XOR_3]], ptr [[A_GEP_3]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 16
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ult i64 [[IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
@@ -227,37 +223,37 @@ entry:
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
   %p = phi i32 [ 0, %entry ], [ %A.3, %loop ]
-  %B.gep.0 = getelementptr inbounds i32, i32* %B, i64 %iv
-  %B.0 = load i32, i32* %B.gep.0, align 4
-  %A.gep.0 = getelementptr inbounds i32, i32* %A, i64 %iv
-  %A.0 = load i32, i32* %A.gep.0, align 4
+  %B.gep.0 = getelementptr inbounds i32, ptr %B, i64 %iv
+  %B.0 = load i32, ptr %B.gep.0, align 4
+  %A.gep.0 = getelementptr inbounds i32, ptr %A, i64 %iv
+  %A.0 = load i32, ptr %A.gep.0, align 4
   %add.0 = add i32  %A.0, 20
   %xor.0 = xor i32 %add.0, %B.0
-  store i32 %xor.0, i32* %A.gep.0, align 4
+  store i32 %xor.0, ptr %A.gep.0, align 4
   %iv.1 = or i64 %iv, 1
-  %B.gep.1 = getelementptr inbounds i32, i32* %B, i64 %iv.1
-  %B.1 = load i32, i32* %B.gep.1, align 4
-  %A.gep.1 = getelementptr inbounds i32, i32* %A, i64 %iv.1
-  %A.1 = load i32, i32* %A.gep.1, align 4
+  %B.gep.1 = getelementptr inbounds i32, ptr %B, i64 %iv.1
+  %B.1 = load i32, ptr %B.gep.1, align 4
+  %A.gep.1 = getelementptr inbounds i32, ptr %A, i64 %iv.1
+  %A.1 = load i32, ptr %A.gep.1, align 4
   %add.1 = add i32  %A.1, 20
   %xor.1 = xor i32 %add.1, %B.1
-  store i32 %xor.1, i32* %A.gep.1, align 4
+  store i32 %xor.1, ptr %A.gep.1, align 4
   %iv.2 = or i64 %iv, 2
-  %B.gep.2 = getelementptr inbounds i32, i32* %B, i64 %iv.2
-  %B.2 = load i32, i32* %B.gep.2, align 4
-  %A.gep.2 = getelementptr inbounds i32, i32* %A, i64 %iv.2
-  %A.2 = load i32, i32* %A.gep.2, align 4
+  %B.gep.2 = getelementptr inbounds i32, ptr %B, i64 %iv.2
+  %B.2 = load i32, ptr %B.gep.2, align 4
+  %A.gep.2 = getelementptr inbounds i32, ptr %A, i64 %iv.2
+  %A.2 = load i32, ptr %A.gep.2, align 4
   %add.2 = add i32  %A.2, 20
   %xor.2 = xor i32 %add.2, %B.2
-  store i32 %xor.2, i32* %A.gep.2, align 4
+  store i32 %xor.2, ptr %A.gep.2, align 4
   %iv.3 = or i64 %iv, 3
-  %B.gep.3 = getelementptr inbounds i32, i32* %B, i64 %iv.3
-  %B.3 = load i32, i32* %B.gep.3, align 4
-  %A.gep.3 = getelementptr inbounds i32, i32* %A, i64 %iv.3
-  %A.3 = load i32, i32* %A.gep.3, align 4
+  %B.gep.3 = getelementptr inbounds i32, ptr %B, i64 %iv.3
+  %B.3 = load i32, ptr %B.gep.3, align 4
+  %A.gep.3 = getelementptr inbounds i32, ptr %A, i64 %iv.3
+  %A.3 = load i32, ptr %A.gep.3, align 4
   %add.3 = add i32  %A.3, 20
   %xor.3 = xor i32 %add.3, %B.3
-  store i32 %xor.3, i32* %A.gep.3, align 4
+  store i32 %xor.3, ptr %A.gep.3, align 4
   %iv.next = add nuw nsw i64 %iv, 16
   %cond = icmp ult i64 %iv.next, %N
   br i1 %cond, label %loop, label %exit

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
index 84b571c975e36..920e1e64e3958 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
@@ -39,12 +39,12 @@ for.end11:
 ; every root in the vectorizable tree when computing minimum sizes since one
 ; root may require fewer bits than another.
 ;
-define void @PR26629(i32* %c) {
+define void @PR26629(ptr %c) {
 ; CHECK-LABEL: @PR26629(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[FOR_PH:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[D:%.*]] = phi i72 [ 576507472957710340, [[FOR_PH]] ], [ [[BF_SET17:%.*]], [[FOR_BODY]] ]
@@ -62,7 +62,7 @@ entry:
   br i1 undef, label %for.ph, label %for.end
 
 for.ph:
-  %0 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %c, align 4
   br label %for.body
 
 for.body:

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/mul.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/mul.ll
index 5a4d6318796e2..4f868e3f154dc 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/mul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/mul.ll
@@ -7,8 +7,8 @@ target triple = "aarch64--linux-gnu"
 ; These examples correspond to input code like:
 ;
 ;   void t(long * __restrict a, long * __restrict b) {
-;     a[0] *= b[0];
-;     a[1] *= b[1];
+;     aptr= b[0];
+;     aptr= b[1];
 ;   }
 ;
 ; If we SLP vectorise this then we end up with something like this because we
@@ -36,73 +36,73 @@ target triple = "aarch64--linux-gnu"
 ;        stp     x8, x9, [x0]
 ;        ret
 ;
-define void @mul(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) {
+define void @mul(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; CHECK-LABEL: @mul(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    store i64 [[MUL]], i64* [[A]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    store i64 [[MUL]], ptr [[A]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    store i64 [[MUL4]], i64* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    store i64 [[MUL4]], ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i64, i64* %b, align 8
-  %1 = load i64, i64* %a, align 8
+  %0 = load i64, ptr %b, align 8
+  %1 = load i64, ptr %a, align 8
   %mul = mul nsw i64 %1, %0
-  store i64 %mul, i64* %a, align 8
-  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1
-  %2 = load i64, i64* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %3 = load i64, i64* %arrayidx3, align 8
+  store i64 %mul, ptr %a, align 8
+  %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1
+  %2 = load i64, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1
+  %3 = load i64, ptr %arrayidx3, align 8
   %mul4 = mul nsw i64 %3, %2
-  store i64 %mul4, i64* %arrayidx3, align 8
+  store i64 %mul4, ptr %arrayidx3, align 8
   ret void
 }
 
 ; Similar example, but now a multiply-accumulate:
 ;
 ;  void x (long * __restrict a, long * __restrict b) {
-;    a[0] *= b[0];
-;    a[1] *= b[1];
+;    aptr= b[0];
+;    aptr= b[1];
 ;    a[0] += b[0];
 ;    a[1] += b[1];
 ;  }
 ;
-define void @mac(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) {
+define void @mac(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; CHECK-LABEL: @mac(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[B:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[A]], align 8
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[A]], align 8
 ; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]]
-; CHECK-NEXT:    store i64 [[ADD9]], i64* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    store i64 [[ADD9]], ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i64, i64* %b, align 8
-  %1 = load i64, i64* %a, align 8
+  %0 = load i64, ptr %b, align 8
+  %1 = load i64, ptr %a, align 8
   %mul = mul nsw i64 %1, %0
-  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1
-  %2 = load i64, i64* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %3 = load i64, i64* %arrayidx3, align 8
+  %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1
+  %2 = load i64, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1
+  %3 = load i64, ptr %arrayidx3, align 8
   %mul4 = mul nsw i64 %3, %2
   %add = add nsw i64 %mul, %0
-  store i64 %add, i64* %a, align 8
+  store i64 %add, ptr %a, align 8
   %add9 = add nsw i64 %mul4, %2
-  store i64 %add9, i64* %arrayidx3, align 8
+  store i64 %add9, ptr %arrayidx3, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll
index 8f653efc62297..7a7a6e72565f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll
@@ -7,390 +7,390 @@ target triple = "aarch64-arm-none-eabi"
 ; This test has mutual reductions, referencing the same data:
 ; for i = ...
 ;   sm += x[i];
-;   sq += x[i] * x[i];
+;   sq += xptr x[i];
 ; It currently doesn't SLP vectorize, but should.
 
-define i64 @straight(i16* nocapture noundef readonly %p, i32 noundef %st) {
+define i64 @straight(ptr nocapture noundef readonly %p, i32 noundef %st) {
 ; CHECK-LABEL: @straight(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[P:%.*]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 2
 ; CHECK-NEXT:    [[CONV_1:%.*]] = zext i16 [[TMP1]] to i32
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[CONV]], [[CONV_1]]
 ; CHECK-NEXT:    [[MUL_1:%.*]] = mul nuw nsw i32 [[CONV_1]], [[CONV_1]]
 ; CHECK-NEXT:    [[ADD11_1:%.*]] = add nuw i32 [[MUL_1]], [[MUL]]
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 2
 ; CHECK-NEXT:    [[CONV_2:%.*]] = zext i16 [[TMP2]] to i32
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], [[CONV_2]]
 ; CHECK-NEXT:    [[MUL_2:%.*]] = mul nuw nsw i32 [[CONV_2]], [[CONV_2]]
 ; CHECK-NEXT:    [[ADD11_2:%.*]] = add i32 [[MUL_2]], [[ADD11_1]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 3
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 2
 ; CHECK-NEXT:    [[CONV_3:%.*]] = zext i16 [[TMP3]] to i32
 ; CHECK-NEXT:    [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], [[CONV_3]]
 ; CHECK-NEXT:    [[MUL_3:%.*]] = mul nuw nsw i32 [[CONV_3]], [[CONV_3]]
 ; CHECK-NEXT:    [[ADD11_3:%.*]] = add i32 [[MUL_3]], [[ADD11_2]]
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_4]], align 2
 ; CHECK-NEXT:    [[CONV_4:%.*]] = zext i16 [[TMP4]] to i32
 ; CHECK-NEXT:    [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], [[CONV_4]]
 ; CHECK-NEXT:    [[MUL_4:%.*]] = mul nuw nsw i32 [[CONV_4]], [[CONV_4]]
 ; CHECK-NEXT:    [[ADD11_4:%.*]] = add i32 [[MUL_4]], [[ADD11_3]]
-; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 5
-; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 5
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
 ; CHECK-NEXT:    [[CONV_5:%.*]] = zext i16 [[TMP5]] to i32
 ; CHECK-NEXT:    [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], [[CONV_5]]
 ; CHECK-NEXT:    [[MUL_5:%.*]] = mul nuw nsw i32 [[CONV_5]], [[CONV_5]]
 ; CHECK-NEXT:    [[ADD11_5:%.*]] = add i32 [[MUL_5]], [[ADD11_4]]
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 6
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 6
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
 ; CHECK-NEXT:    [[CONV_6:%.*]] = zext i16 [[TMP6]] to i32
 ; CHECK-NEXT:    [[ADD_6:%.*]] = add nuw nsw i32 [[ADD_5]], [[CONV_6]]
 ; CHECK-NEXT:    [[MUL_6:%.*]] = mul nuw nsw i32 [[CONV_6]], [[CONV_6]]
 ; CHECK-NEXT:    [[ADD11_6:%.*]] = add i32 [[MUL_6]], [[ADD11_5]]
-; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 7
-; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 7
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
 ; CHECK-NEXT:    [[CONV_7:%.*]] = zext i16 [[TMP7]] to i32
 ; CHECK-NEXT:    [[ADD_7:%.*]] = add nuw nsw i32 [[ADD_6]], [[CONV_7]]
 ; CHECK-NEXT:    [[MUL_7:%.*]] = mul nuw nsw i32 [[CONV_7]], [[CONV_7]]
 ; CHECK-NEXT:    [[ADD11_7:%.*]] = add i32 [[MUL_7]], [[ADD11_6]]
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ADD_PTR]], align 2
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ADD_PTR]], align 2
 ; CHECK-NEXT:    [[CONV_140:%.*]] = zext i16 [[TMP8]] to i32
 ; CHECK-NEXT:    [[ADD_141:%.*]] = add nuw nsw i32 [[ADD_7]], [[CONV_140]]
 ; CHECK-NEXT:    [[MUL_142:%.*]] = mul nuw nsw i32 [[CONV_140]], [[CONV_140]]
 ; CHECK-NEXT:    [[ADD11_143:%.*]] = add i32 [[MUL_142]], [[ADD11_7]]
-; CHECK-NEXT:    [[ARRAYIDX_1_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 1
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_1_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX_1_1]], align 2
 ; CHECK-NEXT:    [[CONV_1_1:%.*]] = zext i16 [[TMP9]] to i32
 ; CHECK-NEXT:    [[ADD_1_1:%.*]] = add nuw nsw i32 [[ADD_141]], [[CONV_1_1]]
 ; CHECK-NEXT:    [[MUL_1_1:%.*]] = mul nuw nsw i32 [[CONV_1_1]], [[CONV_1_1]]
 ; CHECK-NEXT:    [[ADD11_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD11_143]]
-; CHECK-NEXT:    [[ARRAYIDX_2_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 2
-; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_2_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX_2_1]], align 2
 ; CHECK-NEXT:    [[CONV_2_1:%.*]] = zext i16 [[TMP10]] to i32
 ; CHECK-NEXT:    [[ADD_2_1:%.*]] = add nuw nsw i32 [[ADD_1_1]], [[CONV_2_1]]
 ; CHECK-NEXT:    [[MUL_2_1:%.*]] = mul nuw nsw i32 [[CONV_2_1]], [[CONV_2_1]]
 ; CHECK-NEXT:    [[ADD11_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD11_1_1]]
-; CHECK-NEXT:    [[ARRAYIDX_3_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 3
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_3_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX_3_1]], align 2
 ; CHECK-NEXT:    [[CONV_3_1:%.*]] = zext i16 [[TMP11]] to i32
 ; CHECK-NEXT:    [[ADD_3_1:%.*]] = add nuw nsw i32 [[ADD_2_1]], [[CONV_3_1]]
 ; CHECK-NEXT:    [[MUL_3_1:%.*]] = mul nuw nsw i32 [[CONV_3_1]], [[CONV_3_1]]
 ; CHECK-NEXT:    [[ADD11_3_1:%.*]] = add i32 [[MUL_3_1]], [[ADD11_2_1]]
-; CHECK-NEXT:    [[ARRAYIDX_4_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_4_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX_4_1]], align 2
 ; CHECK-NEXT:    [[CONV_4_1:%.*]] = zext i16 [[TMP12]] to i32
 ; CHECK-NEXT:    [[ADD_4_1:%.*]] = add nuw nsw i32 [[ADD_3_1]], [[CONV_4_1]]
 ; CHECK-NEXT:    [[MUL_4_1:%.*]] = mul nuw nsw i32 [[CONV_4_1]], [[CONV_4_1]]
 ; CHECK-NEXT:    [[ADD11_4_1:%.*]] = add i32 [[MUL_4_1]], [[ADD11_3_1]]
-; CHECK-NEXT:    [[ARRAYIDX_5_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 5
-; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_5_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 5
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX_5_1]], align 2
 ; CHECK-NEXT:    [[CONV_5_1:%.*]] = zext i16 [[TMP13]] to i32
 ; CHECK-NEXT:    [[ADD_5_1:%.*]] = add nuw nsw i32 [[ADD_4_1]], [[CONV_5_1]]
 ; CHECK-NEXT:    [[MUL_5_1:%.*]] = mul nuw nsw i32 [[CONV_5_1]], [[CONV_5_1]]
 ; CHECK-NEXT:    [[ADD11_5_1:%.*]] = add i32 [[MUL_5_1]], [[ADD11_4_1]]
-; CHECK-NEXT:    [[ARRAYIDX_6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 6
-; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX_6_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 6
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX_6_1]], align 2
 ; CHECK-NEXT:    [[CONV_6_1:%.*]] = zext i16 [[TMP14]] to i32
 ; CHECK-NEXT:    [[ADD_6_1:%.*]] = add nuw nsw i32 [[ADD_5_1]], [[CONV_6_1]]
 ; CHECK-NEXT:    [[MUL_6_1:%.*]] = mul nuw nsw i32 [[CONV_6_1]], [[CONV_6_1]]
 ; CHECK-NEXT:    [[ADD11_6_1:%.*]] = add i32 [[MUL_6_1]], [[ADD11_5_1]]
-; CHECK-NEXT:    [[ARRAYIDX_7_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 7
-; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX_7_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 7
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX_7_1]], align 2
 ; CHECK-NEXT:    [[CONV_7_1:%.*]] = zext i16 [[TMP15]] to i32
 ; CHECK-NEXT:    [[ADD_7_1:%.*]] = add nuw nsw i32 [[ADD_6_1]], [[CONV_7_1]]
 ; CHECK-NEXT:    [[MUL_7_1:%.*]] = mul nuw nsw i32 [[CONV_7_1]], [[CONV_7_1]]
 ; CHECK-NEXT:    [[ADD11_7_1:%.*]] = add i32 [[MUL_7_1]], [[ADD11_6_1]]
-; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2
+; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ADD_PTR_1]], align 2
 ; CHECK-NEXT:    [[CONV_244:%.*]] = zext i16 [[TMP16]] to i32
 ; CHECK-NEXT:    [[ADD_245:%.*]] = add nuw nsw i32 [[ADD_7_1]], [[CONV_244]]
 ; CHECK-NEXT:    [[MUL_246:%.*]] = mul nuw nsw i32 [[CONV_244]], [[CONV_244]]
 ; CHECK-NEXT:    [[ADD11_247:%.*]] = add i32 [[MUL_246]], [[ADD11_7_1]]
-; CHECK-NEXT:    [[ARRAYIDX_1_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 1
-; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX_1_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX_1_2]], align 2
 ; CHECK-NEXT:    [[CONV_1_2:%.*]] = zext i16 [[TMP17]] to i32
 ; CHECK-NEXT:    [[ADD_1_2:%.*]] = add nuw nsw i32 [[ADD_245]], [[CONV_1_2]]
 ; CHECK-NEXT:    [[MUL_1_2:%.*]] = mul nuw nsw i32 [[CONV_1_2]], [[CONV_1_2]]
 ; CHECK-NEXT:    [[ADD11_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD11_247]]
-; CHECK-NEXT:    [[ARRAYIDX_2_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 2
-; CHECK-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX_2_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 2
+; CHECK-NEXT:    [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX_2_2]], align 2
 ; CHECK-NEXT:    [[CONV_2_2:%.*]] = zext i16 [[TMP18]] to i32
 ; CHECK-NEXT:    [[ADD_2_2:%.*]] = add nuw nsw i32 [[ADD_1_2]], [[CONV_2_2]]
 ; CHECK-NEXT:    [[MUL_2_2:%.*]] = mul nuw nsw i32 [[CONV_2_2]], [[CONV_2_2]]
 ; CHECK-NEXT:    [[ADD11_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD11_1_2]]
-; CHECK-NEXT:    [[ARRAYIDX_3_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 3
-; CHECK-NEXT:    [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX_3_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 3
+; CHECK-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX_3_2]], align 2
 ; CHECK-NEXT:    [[CONV_3_2:%.*]] = zext i16 [[TMP19]] to i32
 ; CHECK-NEXT:    [[ADD_3_2:%.*]] = add nuw nsw i32 [[ADD_2_2]], [[CONV_3_2]]
 ; CHECK-NEXT:    [[MUL_3_2:%.*]] = mul nuw nsw i32 [[CONV_3_2]], [[CONV_3_2]]
 ; CHECK-NEXT:    [[ADD11_3_2:%.*]] = add i32 [[MUL_3_2]], [[ADD11_2_2]]
-; CHECK-NEXT:    [[ARRAYIDX_4_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 4
-; CHECK-NEXT:    [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX_4_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX_4_2]], align 2
 ; CHECK-NEXT:    [[CONV_4_2:%.*]] = zext i16 [[TMP20]] to i32
 ; CHECK-NEXT:    [[ADD_4_2:%.*]] = add nuw nsw i32 [[ADD_3_2]], [[CONV_4_2]]
 ; CHECK-NEXT:    [[MUL_4_2:%.*]] = mul nuw nsw i32 [[CONV_4_2]], [[CONV_4_2]]
 ; CHECK-NEXT:    [[ADD11_4_2:%.*]] = add i32 [[MUL_4_2]], [[ADD11_3_2]]
-; CHECK-NEXT:    [[ARRAYIDX_5_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 5
-; CHECK-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX_5_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 5
+; CHECK-NEXT:    [[TMP21:%.*]] = load i16, ptr [[ARRAYIDX_5_2]], align 2
 ; CHECK-NEXT:    [[CONV_5_2:%.*]] = zext i16 [[TMP21]] to i32
 ; CHECK-NEXT:    [[ADD_5_2:%.*]] = add nuw nsw i32 [[ADD_4_2]], [[CONV_5_2]]
 ; CHECK-NEXT:    [[MUL_5_2:%.*]] = mul nuw nsw i32 [[CONV_5_2]], [[CONV_5_2]]
 ; CHECK-NEXT:    [[ADD11_5_2:%.*]] = add i32 [[MUL_5_2]], [[ADD11_4_2]]
-; CHECK-NEXT:    [[ARRAYIDX_6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 6
-; CHECK-NEXT:    [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX_6_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 6
+; CHECK-NEXT:    [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX_6_2]], align 2
 ; CHECK-NEXT:    [[CONV_6_2:%.*]] = zext i16 [[TMP22]] to i32
 ; CHECK-NEXT:    [[ADD_6_2:%.*]] = add nuw nsw i32 [[ADD_5_2]], [[CONV_6_2]]
 ; CHECK-NEXT:    [[MUL_6_2:%.*]] = mul nuw nsw i32 [[CONV_6_2]], [[CONV_6_2]]
 ; CHECK-NEXT:    [[ADD11_6_2:%.*]] = add i32 [[MUL_6_2]], [[ADD11_5_2]]
-; CHECK-NEXT:    [[ARRAYIDX_7_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 7
-; CHECK-NEXT:    [[TMP23:%.*]] = load i16, i16* [[ARRAYIDX_7_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 7
+; CHECK-NEXT:    [[TMP23:%.*]] = load i16, ptr [[ARRAYIDX_7_2]], align 2
 ; CHECK-NEXT:    [[CONV_7_2:%.*]] = zext i16 [[TMP23]] to i32
 ; CHECK-NEXT:    [[ADD_7_2:%.*]] = add nuw nsw i32 [[ADD_6_2]], [[CONV_7_2]]
 ; CHECK-NEXT:    [[MUL_7_2:%.*]] = mul nuw nsw i32 [[CONV_7_2]], [[CONV_7_2]]
 ; CHECK-NEXT:    [[ADD11_7_2:%.*]] = add i32 [[MUL_7_2]], [[ADD11_6_2]]
-; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2
+; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i16, ptr [[ADD_PTR_2]], align 2
 ; CHECK-NEXT:    [[CONV_348:%.*]] = zext i16 [[TMP24]] to i32
 ; CHECK-NEXT:    [[ADD_349:%.*]] = add nuw nsw i32 [[ADD_7_2]], [[CONV_348]]
 ; CHECK-NEXT:    [[MUL_350:%.*]] = mul nuw nsw i32 [[CONV_348]], [[CONV_348]]
 ; CHECK-NEXT:    [[ADD11_351:%.*]] = add i32 [[MUL_350]], [[ADD11_7_2]]
-; CHECK-NEXT:    [[ARRAYIDX_1_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 1
-; CHECK-NEXT:    [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX_1_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 1
+; CHECK-NEXT:    [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX_1_3]], align 2
 ; CHECK-NEXT:    [[CONV_1_3:%.*]] = zext i16 [[TMP25]] to i32
 ; CHECK-NEXT:    [[ADD_1_3:%.*]] = add nuw nsw i32 [[ADD_349]], [[CONV_1_3]]
 ; CHECK-NEXT:    [[MUL_1_3:%.*]] = mul nuw nsw i32 [[CONV_1_3]], [[CONV_1_3]]
 ; CHECK-NEXT:    [[ADD11_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD11_351]]
-; CHECK-NEXT:    [[ARRAYIDX_2_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 2
-; CHECK-NEXT:    [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX_2_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 2
+; CHECK-NEXT:    [[TMP26:%.*]] = load i16, ptr [[ARRAYIDX_2_3]], align 2
 ; CHECK-NEXT:    [[CONV_2_3:%.*]] = zext i16 [[TMP26]] to i32
 ; CHECK-NEXT:    [[ADD_2_3:%.*]] = add nuw nsw i32 [[ADD_1_3]], [[CONV_2_3]]
 ; CHECK-NEXT:    [[MUL_2_3:%.*]] = mul nuw nsw i32 [[CONV_2_3]], [[CONV_2_3]]
 ; CHECK-NEXT:    [[ADD11_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD11_1_3]]
-; CHECK-NEXT:    [[ARRAYIDX_3_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 3
-; CHECK-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX_3_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 3
+; CHECK-NEXT:    [[TMP27:%.*]] = load i16, ptr [[ARRAYIDX_3_3]], align 2
 ; CHECK-NEXT:    [[CONV_3_3:%.*]] = zext i16 [[TMP27]] to i32
 ; CHECK-NEXT:    [[ADD_3_3:%.*]] = add nuw nsw i32 [[ADD_2_3]], [[CONV_3_3]]
 ; CHECK-NEXT:    [[MUL_3_3:%.*]] = mul nuw nsw i32 [[CONV_3_3]], [[CONV_3_3]]
 ; CHECK-NEXT:    [[ADD11_3_3:%.*]] = add i32 [[MUL_3_3]], [[ADD11_2_3]]
-; CHECK-NEXT:    [[ARRAYIDX_4_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 4
-; CHECK-NEXT:    [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX_4_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 4
+; CHECK-NEXT:    [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX_4_3]], align 2
 ; CHECK-NEXT:    [[CONV_4_3:%.*]] = zext i16 [[TMP28]] to i32
 ; CHECK-NEXT:    [[ADD_4_3:%.*]] = add nuw nsw i32 [[ADD_3_3]], [[CONV_4_3]]
 ; CHECK-NEXT:    [[MUL_4_3:%.*]] = mul nuw nsw i32 [[CONV_4_3]], [[CONV_4_3]]
 ; CHECK-NEXT:    [[ADD11_4_3:%.*]] = add i32 [[MUL_4_3]], [[ADD11_3_3]]
-; CHECK-NEXT:    [[ARRAYIDX_5_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 5
-; CHECK-NEXT:    [[TMP29:%.*]] = load i16, i16* [[ARRAYIDX_5_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 5
+; CHECK-NEXT:    [[TMP29:%.*]] = load i16, ptr [[ARRAYIDX_5_3]], align 2
 ; CHECK-NEXT:    [[CONV_5_3:%.*]] = zext i16 [[TMP29]] to i32
 ; CHECK-NEXT:    [[ADD_5_3:%.*]] = add nuw nsw i32 [[ADD_4_3]], [[CONV_5_3]]
 ; CHECK-NEXT:    [[MUL_5_3:%.*]] = mul nuw nsw i32 [[CONV_5_3]], [[CONV_5_3]]
 ; CHECK-NEXT:    [[ADD11_5_3:%.*]] = add i32 [[MUL_5_3]], [[ADD11_4_3]]
-; CHECK-NEXT:    [[ARRAYIDX_6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 6
-; CHECK-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX_6_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 6
+; CHECK-NEXT:    [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX_6_3]], align 2
 ; CHECK-NEXT:    [[CONV_6_3:%.*]] = zext i16 [[TMP30]] to i32
 ; CHECK-NEXT:    [[ADD_6_3:%.*]] = add nuw nsw i32 [[ADD_5_3]], [[CONV_6_3]]
 ; CHECK-NEXT:    [[MUL_6_3:%.*]] = mul nuw nsw i32 [[CONV_6_3]], [[CONV_6_3]]
 ; CHECK-NEXT:    [[ADD11_6_3:%.*]] = add i32 [[MUL_6_3]], [[ADD11_5_3]]
-; CHECK-NEXT:    [[ARRAYIDX_7_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 7
-; CHECK-NEXT:    [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX_7_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 7
+; CHECK-NEXT:    [[TMP31:%.*]] = load i16, ptr [[ARRAYIDX_7_3]], align 2
 ; CHECK-NEXT:    [[CONV_7_3:%.*]] = zext i16 [[TMP31]] to i32
 ; CHECK-NEXT:    [[ADD_7_3:%.*]] = add nuw nsw i32 [[ADD_6_3]], [[CONV_7_3]]
 ; CHECK-NEXT:    [[MUL_7_3:%.*]] = mul nuw nsw i32 [[CONV_7_3]], [[CONV_7_3]]
 ; CHECK-NEXT:    [[ADD11_7_3:%.*]] = add i32 [[MUL_7_3]], [[ADD11_6_3]]
-; CHECK-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP32:%.*]] = load i16, i16* [[ADD_PTR_3]], align 2
+; CHECK-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_2]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP32:%.*]] = load i16, ptr [[ADD_PTR_3]], align 2
 ; CHECK-NEXT:    [[CONV_452:%.*]] = zext i16 [[TMP32]] to i32
 ; CHECK-NEXT:    [[ADD_453:%.*]] = add nuw nsw i32 [[ADD_7_3]], [[CONV_452]]
 ; CHECK-NEXT:    [[MUL_454:%.*]] = mul nuw nsw i32 [[CONV_452]], [[CONV_452]]
 ; CHECK-NEXT:    [[ADD11_455:%.*]] = add i32 [[MUL_454]], [[ADD11_7_3]]
-; CHECK-NEXT:    [[ARRAYIDX_1_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 1
-; CHECK-NEXT:    [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX_1_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 1
+; CHECK-NEXT:    [[TMP33:%.*]] = load i16, ptr [[ARRAYIDX_1_4]], align 2
 ; CHECK-NEXT:    [[CONV_1_4:%.*]] = zext i16 [[TMP33]] to i32
 ; CHECK-NEXT:    [[ADD_1_4:%.*]] = add nuw nsw i32 [[ADD_453]], [[CONV_1_4]]
 ; CHECK-NEXT:    [[MUL_1_4:%.*]] = mul nuw nsw i32 [[CONV_1_4]], [[CONV_1_4]]
 ; CHECK-NEXT:    [[ADD11_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD11_455]]
-; CHECK-NEXT:    [[ARRAYIDX_2_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 2
-; CHECK-NEXT:    [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX_2_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 2
+; CHECK-NEXT:    [[TMP34:%.*]] = load i16, ptr [[ARRAYIDX_2_4]], align 2
 ; CHECK-NEXT:    [[CONV_2_4:%.*]] = zext i16 [[TMP34]] to i32
 ; CHECK-NEXT:    [[ADD_2_4:%.*]] = add nuw nsw i32 [[ADD_1_4]], [[CONV_2_4]]
 ; CHECK-NEXT:    [[MUL_2_4:%.*]] = mul nuw nsw i32 [[CONV_2_4]], [[CONV_2_4]]
 ; CHECK-NEXT:    [[ADD11_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD11_1_4]]
-; CHECK-NEXT:    [[ARRAYIDX_3_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX_3_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 3
+; CHECK-NEXT:    [[TMP35:%.*]] = load i16, ptr [[ARRAYIDX_3_4]], align 2
 ; CHECK-NEXT:    [[CONV_3_4:%.*]] = zext i16 [[TMP35]] to i32
 ; CHECK-NEXT:    [[ADD_3_4:%.*]] = add nuw nsw i32 [[ADD_2_4]], [[CONV_3_4]]
 ; CHECK-NEXT:    [[MUL_3_4:%.*]] = mul nuw nsw i32 [[CONV_3_4]], [[CONV_3_4]]
 ; CHECK-NEXT:    [[ADD11_3_4:%.*]] = add i32 [[MUL_3_4]], [[ADD11_2_4]]
-; CHECK-NEXT:    [[ARRAYIDX_4_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 4
-; CHECK-NEXT:    [[TMP36:%.*]] = load i16, i16* [[ARRAYIDX_4_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 4
+; CHECK-NEXT:    [[TMP36:%.*]] = load i16, ptr [[ARRAYIDX_4_4]], align 2
 ; CHECK-NEXT:    [[CONV_4_4:%.*]] = zext i16 [[TMP36]] to i32
 ; CHECK-NEXT:    [[ADD_4_4:%.*]] = add nuw nsw i32 [[ADD_3_4]], [[CONV_4_4]]
 ; CHECK-NEXT:    [[MUL_4_4:%.*]] = mul nuw nsw i32 [[CONV_4_4]], [[CONV_4_4]]
 ; CHECK-NEXT:    [[ADD11_4_4:%.*]] = add i32 [[MUL_4_4]], [[ADD11_3_4]]
-; CHECK-NEXT:    [[ARRAYIDX_5_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 5
-; CHECK-NEXT:    [[TMP37:%.*]] = load i16, i16* [[ARRAYIDX_5_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 5
+; CHECK-NEXT:    [[TMP37:%.*]] = load i16, ptr [[ARRAYIDX_5_4]], align 2
 ; CHECK-NEXT:    [[CONV_5_4:%.*]] = zext i16 [[TMP37]] to i32
 ; CHECK-NEXT:    [[ADD_5_4:%.*]] = add nuw nsw i32 [[ADD_4_4]], [[CONV_5_4]]
 ; CHECK-NEXT:    [[MUL_5_4:%.*]] = mul nuw nsw i32 [[CONV_5_4]], [[CONV_5_4]]
 ; CHECK-NEXT:    [[ADD11_5_4:%.*]] = add i32 [[MUL_5_4]], [[ADD11_4_4]]
-; CHECK-NEXT:    [[ARRAYIDX_6_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 6
-; CHECK-NEXT:    [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX_6_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 6
+; CHECK-NEXT:    [[TMP38:%.*]] = load i16, ptr [[ARRAYIDX_6_4]], align 2
 ; CHECK-NEXT:    [[CONV_6_4:%.*]] = zext i16 [[TMP38]] to i32
 ; CHECK-NEXT:    [[ADD_6_4:%.*]] = add nuw nsw i32 [[ADD_5_4]], [[CONV_6_4]]
 ; CHECK-NEXT:    [[MUL_6_4:%.*]] = mul nuw nsw i32 [[CONV_6_4]], [[CONV_6_4]]
 ; CHECK-NEXT:    [[ADD11_6_4:%.*]] = add i32 [[MUL_6_4]], [[ADD11_5_4]]
-; CHECK-NEXT:    [[ARRAYIDX_7_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 7
-; CHECK-NEXT:    [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX_7_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 7
+; CHECK-NEXT:    [[TMP39:%.*]] = load i16, ptr [[ARRAYIDX_7_4]], align 2
 ; CHECK-NEXT:    [[CONV_7_4:%.*]] = zext i16 [[TMP39]] to i32
 ; CHECK-NEXT:    [[ADD_7_4:%.*]] = add nuw nsw i32 [[ADD_6_4]], [[CONV_7_4]]
 ; CHECK-NEXT:    [[MUL_7_4:%.*]] = mul nuw nsw i32 [[CONV_7_4]], [[CONV_7_4]]
 ; CHECK-NEXT:    [[ADD11_7_4:%.*]] = add i32 [[MUL_7_4]], [[ADD11_6_4]]
-; CHECK-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP40:%.*]] = load i16, i16* [[ADD_PTR_4]], align 2
+; CHECK-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_3]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP40:%.*]] = load i16, ptr [[ADD_PTR_4]], align 2
 ; CHECK-NEXT:    [[CONV_556:%.*]] = zext i16 [[TMP40]] to i32
 ; CHECK-NEXT:    [[ADD_557:%.*]] = add nuw nsw i32 [[ADD_7_4]], [[CONV_556]]
 ; CHECK-NEXT:    [[MUL_558:%.*]] = mul nuw nsw i32 [[CONV_556]], [[CONV_556]]
 ; CHECK-NEXT:    [[ADD11_559:%.*]] = add i32 [[MUL_558]], [[ADD11_7_4]]
-; CHECK-NEXT:    [[ARRAYIDX_1_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 1
-; CHECK-NEXT:    [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX_1_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 1
+; CHECK-NEXT:    [[TMP41:%.*]] = load i16, ptr [[ARRAYIDX_1_5]], align 2
 ; CHECK-NEXT:    [[CONV_1_5:%.*]] = zext i16 [[TMP41]] to i32
 ; CHECK-NEXT:    [[ADD_1_5:%.*]] = add nuw nsw i32 [[ADD_557]], [[CONV_1_5]]
 ; CHECK-NEXT:    [[MUL_1_5:%.*]] = mul nuw nsw i32 [[CONV_1_5]], [[CONV_1_5]]
 ; CHECK-NEXT:    [[ADD11_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD11_559]]
-; CHECK-NEXT:    [[ARRAYIDX_2_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 2
-; CHECK-NEXT:    [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX_2_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 2
+; CHECK-NEXT:    [[TMP42:%.*]] = load i16, ptr [[ARRAYIDX_2_5]], align 2
 ; CHECK-NEXT:    [[CONV_2_5:%.*]] = zext i16 [[TMP42]] to i32
 ; CHECK-NEXT:    [[ADD_2_5:%.*]] = add nuw nsw i32 [[ADD_1_5]], [[CONV_2_5]]
 ; CHECK-NEXT:    [[MUL_2_5:%.*]] = mul nuw nsw i32 [[CONV_2_5]], [[CONV_2_5]]
 ; CHECK-NEXT:    [[ADD11_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD11_1_5]]
-; CHECK-NEXT:    [[ARRAYIDX_3_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 3
-; CHECK-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX_3_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 3
+; CHECK-NEXT:    [[TMP43:%.*]] = load i16, ptr [[ARRAYIDX_3_5]], align 2
 ; CHECK-NEXT:    [[CONV_3_5:%.*]] = zext i16 [[TMP43]] to i32
 ; CHECK-NEXT:    [[ADD_3_5:%.*]] = add nuw nsw i32 [[ADD_2_5]], [[CONV_3_5]]
 ; CHECK-NEXT:    [[MUL_3_5:%.*]] = mul nuw nsw i32 [[CONV_3_5]], [[CONV_3_5]]
 ; CHECK-NEXT:    [[ADD11_3_5:%.*]] = add i32 [[MUL_3_5]], [[ADD11_2_5]]
-; CHECK-NEXT:    [[ARRAYIDX_4_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 4
-; CHECK-NEXT:    [[TMP44:%.*]] = load i16, i16* [[ARRAYIDX_4_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 4
+; CHECK-NEXT:    [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX_4_5]], align 2
 ; CHECK-NEXT:    [[CONV_4_5:%.*]] = zext i16 [[TMP44]] to i32
 ; CHECK-NEXT:    [[ADD_4_5:%.*]] = add nuw nsw i32 [[ADD_3_5]], [[CONV_4_5]]
 ; CHECK-NEXT:    [[MUL_4_5:%.*]] = mul nuw nsw i32 [[CONV_4_5]], [[CONV_4_5]]
 ; CHECK-NEXT:    [[ADD11_4_5:%.*]] = add i32 [[MUL_4_5]], [[ADD11_3_5]]
-; CHECK-NEXT:    [[ARRAYIDX_5_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 5
-; CHECK-NEXT:    [[TMP45:%.*]] = load i16, i16* [[ARRAYIDX_5_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 5
+; CHECK-NEXT:    [[TMP45:%.*]] = load i16, ptr [[ARRAYIDX_5_5]], align 2
 ; CHECK-NEXT:    [[CONV_5_5:%.*]] = zext i16 [[TMP45]] to i32
 ; CHECK-NEXT:    [[ADD_5_5:%.*]] = add nuw nsw i32 [[ADD_4_5]], [[CONV_5_5]]
 ; CHECK-NEXT:    [[MUL_5_5:%.*]] = mul nuw nsw i32 [[CONV_5_5]], [[CONV_5_5]]
 ; CHECK-NEXT:    [[ADD11_5_5:%.*]] = add i32 [[MUL_5_5]], [[ADD11_4_5]]
-; CHECK-NEXT:    [[ARRAYIDX_6_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 6
-; CHECK-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX_6_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 6
+; CHECK-NEXT:    [[TMP46:%.*]] = load i16, ptr [[ARRAYIDX_6_5]], align 2
 ; CHECK-NEXT:    [[CONV_6_5:%.*]] = zext i16 [[TMP46]] to i32
 ; CHECK-NEXT:    [[ADD_6_5:%.*]] = add nuw nsw i32 [[ADD_5_5]], [[CONV_6_5]]
 ; CHECK-NEXT:    [[MUL_6_5:%.*]] = mul nuw nsw i32 [[CONV_6_5]], [[CONV_6_5]]
 ; CHECK-NEXT:    [[ADD11_6_5:%.*]] = add i32 [[MUL_6_5]], [[ADD11_5_5]]
-; CHECK-NEXT:    [[ARRAYIDX_7_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 7
-; CHECK-NEXT:    [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX_7_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 7
+; CHECK-NEXT:    [[TMP47:%.*]] = load i16, ptr [[ARRAYIDX_7_5]], align 2
 ; CHECK-NEXT:    [[CONV_7_5:%.*]] = zext i16 [[TMP47]] to i32
 ; CHECK-NEXT:    [[ADD_7_5:%.*]] = add nuw nsw i32 [[ADD_6_5]], [[CONV_7_5]]
 ; CHECK-NEXT:    [[MUL_7_5:%.*]] = mul nuw nsw i32 [[CONV_7_5]], [[CONV_7_5]]
 ; CHECK-NEXT:    [[ADD11_7_5:%.*]] = add i32 [[MUL_7_5]], [[ADD11_6_5]]
-; CHECK-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP48:%.*]] = load i16, i16* [[ADD_PTR_5]], align 2
+; CHECK-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_4]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i16, ptr [[ADD_PTR_5]], align 2
 ; CHECK-NEXT:    [[CONV_660:%.*]] = zext i16 [[TMP48]] to i32
 ; CHECK-NEXT:    [[ADD_661:%.*]] = add nuw nsw i32 [[ADD_7_5]], [[CONV_660]]
 ; CHECK-NEXT:    [[MUL_662:%.*]] = mul nuw nsw i32 [[CONV_660]], [[CONV_660]]
 ; CHECK-NEXT:    [[ADD11_663:%.*]] = add i32 [[MUL_662]], [[ADD11_7_5]]
-; CHECK-NEXT:    [[ARRAYIDX_1_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 1
-; CHECK-NEXT:    [[TMP49:%.*]] = load i16, i16* [[ARRAYIDX_1_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 1
+; CHECK-NEXT:    [[TMP49:%.*]] = load i16, ptr [[ARRAYIDX_1_6]], align 2
 ; CHECK-NEXT:    [[CONV_1_6:%.*]] = zext i16 [[TMP49]] to i32
 ; CHECK-NEXT:    [[ADD_1_6:%.*]] = add nuw nsw i32 [[ADD_661]], [[CONV_1_6]]
 ; CHECK-NEXT:    [[MUL_1_6:%.*]] = mul nuw nsw i32 [[CONV_1_6]], [[CONV_1_6]]
 ; CHECK-NEXT:    [[ADD11_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD11_663]]
-; CHECK-NEXT:    [[ARRAYIDX_2_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 2
-; CHECK-NEXT:    [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_2_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 2
+; CHECK-NEXT:    [[TMP50:%.*]] = load i16, ptr [[ARRAYIDX_2_6]], align 2
 ; CHECK-NEXT:    [[CONV_2_6:%.*]] = zext i16 [[TMP50]] to i32
 ; CHECK-NEXT:    [[ADD_2_6:%.*]] = add nuw nsw i32 [[ADD_1_6]], [[CONV_2_6]]
 ; CHECK-NEXT:    [[MUL_2_6:%.*]] = mul nuw nsw i32 [[CONV_2_6]], [[CONV_2_6]]
 ; CHECK-NEXT:    [[ADD11_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD11_1_6]]
-; CHECK-NEXT:    [[ARRAYIDX_3_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 3
-; CHECK-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX_3_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 3
+; CHECK-NEXT:    [[TMP51:%.*]] = load i16, ptr [[ARRAYIDX_3_6]], align 2
 ; CHECK-NEXT:    [[CONV_3_6:%.*]] = zext i16 [[TMP51]] to i32
 ; CHECK-NEXT:    [[ADD_3_6:%.*]] = add nuw nsw i32 [[ADD_2_6]], [[CONV_3_6]]
 ; CHECK-NEXT:    [[MUL_3_6:%.*]] = mul nuw nsw i32 [[CONV_3_6]], [[CONV_3_6]]
 ; CHECK-NEXT:    [[ADD11_3_6:%.*]] = add i32 [[MUL_3_6]], [[ADD11_2_6]]
-; CHECK-NEXT:    [[ARRAYIDX_4_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 4
-; CHECK-NEXT:    [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX_4_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 4
+; CHECK-NEXT:    [[TMP52:%.*]] = load i16, ptr [[ARRAYIDX_4_6]], align 2
 ; CHECK-NEXT:    [[CONV_4_6:%.*]] = zext i16 [[TMP52]] to i32
 ; CHECK-NEXT:    [[ADD_4_6:%.*]] = add nuw nsw i32 [[ADD_3_6]], [[CONV_4_6]]
 ; CHECK-NEXT:    [[MUL_4_6:%.*]] = mul nuw nsw i32 [[CONV_4_6]], [[CONV_4_6]]
 ; CHECK-NEXT:    [[ADD11_4_6:%.*]] = add i32 [[MUL_4_6]], [[ADD11_3_6]]
-; CHECK-NEXT:    [[ARRAYIDX_5_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 5
-; CHECK-NEXT:    [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX_5_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 5
+; CHECK-NEXT:    [[TMP53:%.*]] = load i16, ptr [[ARRAYIDX_5_6]], align 2
 ; CHECK-NEXT:    [[CONV_5_6:%.*]] = zext i16 [[TMP53]] to i32
 ; CHECK-NEXT:    [[ADD_5_6:%.*]] = add nuw nsw i32 [[ADD_4_6]], [[CONV_5_6]]
 ; CHECK-NEXT:    [[MUL_5_6:%.*]] = mul nuw nsw i32 [[CONV_5_6]], [[CONV_5_6]]
 ; CHECK-NEXT:    [[ADD11_5_6:%.*]] = add i32 [[MUL_5_6]], [[ADD11_4_6]]
-; CHECK-NEXT:    [[ARRAYIDX_6_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 6
-; CHECK-NEXT:    [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX_6_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 6
+; CHECK-NEXT:    [[TMP54:%.*]] = load i16, ptr [[ARRAYIDX_6_6]], align 2
 ; CHECK-NEXT:    [[CONV_6_6:%.*]] = zext i16 [[TMP54]] to i32
 ; CHECK-NEXT:    [[ADD_6_6:%.*]] = add nuw nsw i32 [[ADD_5_6]], [[CONV_6_6]]
 ; CHECK-NEXT:    [[MUL_6_6:%.*]] = mul nuw nsw i32 [[CONV_6_6]], [[CONV_6_6]]
 ; CHECK-NEXT:    [[ADD11_6_6:%.*]] = add i32 [[MUL_6_6]], [[ADD11_5_6]]
-; CHECK-NEXT:    [[ARRAYIDX_7_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 7
-; CHECK-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX_7_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 7
+; CHECK-NEXT:    [[TMP55:%.*]] = load i16, ptr [[ARRAYIDX_7_6]], align 2
 ; CHECK-NEXT:    [[CONV_7_6:%.*]] = zext i16 [[TMP55]] to i32
 ; CHECK-NEXT:    [[ADD_7_6:%.*]] = add nuw nsw i32 [[ADD_6_6]], [[CONV_7_6]]
 ; CHECK-NEXT:    [[MUL_7_6:%.*]] = mul nuw nsw i32 [[CONV_7_6]], [[CONV_7_6]]
 ; CHECK-NEXT:    [[ADD11_7_6:%.*]] = add i32 [[MUL_7_6]], [[ADD11_6_6]]
-; CHECK-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[TMP56:%.*]] = load i16, i16* [[ADD_PTR_6]], align 2
+; CHECK-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_5]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP56:%.*]] = load i16, ptr [[ADD_PTR_6]], align 2
 ; CHECK-NEXT:    [[CONV_764:%.*]] = zext i16 [[TMP56]] to i32
 ; CHECK-NEXT:    [[ADD_765:%.*]] = add nuw nsw i32 [[ADD_7_6]], [[CONV_764]]
 ; CHECK-NEXT:    [[MUL_766:%.*]] = mul nuw nsw i32 [[CONV_764]], [[CONV_764]]
 ; CHECK-NEXT:    [[ADD11_767:%.*]] = add i32 [[MUL_766]], [[ADD11_7_6]]
-; CHECK-NEXT:    [[ARRAYIDX_1_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 1
-; CHECK-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX_1_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 1
+; CHECK-NEXT:    [[TMP57:%.*]] = load i16, ptr [[ARRAYIDX_1_7]], align 2
 ; CHECK-NEXT:    [[CONV_1_7:%.*]] = zext i16 [[TMP57]] to i32
 ; CHECK-NEXT:    [[ADD_1_7:%.*]] = add nuw nsw i32 [[ADD_765]], [[CONV_1_7]]
 ; CHECK-NEXT:    [[MUL_1_7:%.*]] = mul nuw nsw i32 [[CONV_1_7]], [[CONV_1_7]]
 ; CHECK-NEXT:    [[ADD11_1_7:%.*]] = add i32 [[MUL_1_7]], [[ADD11_767]]
-; CHECK-NEXT:    [[ARRAYIDX_2_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 2
-; CHECK-NEXT:    [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX_2_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 2
+; CHECK-NEXT:    [[TMP58:%.*]] = load i16, ptr [[ARRAYIDX_2_7]], align 2
 ; CHECK-NEXT:    [[CONV_2_7:%.*]] = zext i16 [[TMP58]] to i32
 ; CHECK-NEXT:    [[ADD_2_7:%.*]] = add nuw nsw i32 [[ADD_1_7]], [[CONV_2_7]]
 ; CHECK-NEXT:    [[MUL_2_7:%.*]] = mul nuw nsw i32 [[CONV_2_7]], [[CONV_2_7]]
 ; CHECK-NEXT:    [[ADD11_2_7:%.*]] = add i32 [[MUL_2_7]], [[ADD11_1_7]]
-; CHECK-NEXT:    [[ARRAYIDX_3_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 3
-; CHECK-NEXT:    [[TMP59:%.*]] = load i16, i16* [[ARRAYIDX_3_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 3
+; CHECK-NEXT:    [[TMP59:%.*]] = load i16, ptr [[ARRAYIDX_3_7]], align 2
 ; CHECK-NEXT:    [[CONV_3_7:%.*]] = zext i16 [[TMP59]] to i32
 ; CHECK-NEXT:    [[ADD_3_7:%.*]] = add nuw nsw i32 [[ADD_2_7]], [[CONV_3_7]]
 ; CHECK-NEXT:    [[MUL_3_7:%.*]] = mul nuw nsw i32 [[CONV_3_7]], [[CONV_3_7]]
 ; CHECK-NEXT:    [[ADD11_3_7:%.*]] = add i32 [[MUL_3_7]], [[ADD11_2_7]]
-; CHECK-NEXT:    [[ARRAYIDX_4_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 4
-; CHECK-NEXT:    [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX_4_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 4
+; CHECK-NEXT:    [[TMP60:%.*]] = load i16, ptr [[ARRAYIDX_4_7]], align 2
 ; CHECK-NEXT:    [[CONV_4_7:%.*]] = zext i16 [[TMP60]] to i32
 ; CHECK-NEXT:    [[ADD_4_7:%.*]] = add nuw nsw i32 [[ADD_3_7]], [[CONV_4_7]]
 ; CHECK-NEXT:    [[MUL_4_7:%.*]] = mul nuw nsw i32 [[CONV_4_7]], [[CONV_4_7]]
 ; CHECK-NEXT:    [[ADD11_4_7:%.*]] = add i32 [[MUL_4_7]], [[ADD11_3_7]]
-; CHECK-NEXT:    [[ARRAYIDX_5_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 5
-; CHECK-NEXT:    [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX_5_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 5
+; CHECK-NEXT:    [[TMP61:%.*]] = load i16, ptr [[ARRAYIDX_5_7]], align 2
 ; CHECK-NEXT:    [[CONV_5_7:%.*]] = zext i16 [[TMP61]] to i32
 ; CHECK-NEXT:    [[ADD_5_7:%.*]] = add nuw nsw i32 [[ADD_4_7]], [[CONV_5_7]]
 ; CHECK-NEXT:    [[MUL_5_7:%.*]] = mul nuw nsw i32 [[CONV_5_7]], [[CONV_5_7]]
 ; CHECK-NEXT:    [[ADD11_5_7:%.*]] = add i32 [[MUL_5_7]], [[ADD11_4_7]]
-; CHECK-NEXT:    [[ARRAYIDX_6_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 6
-; CHECK-NEXT:    [[TMP62:%.*]] = load i16, i16* [[ARRAYIDX_6_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 6
+; CHECK-NEXT:    [[TMP62:%.*]] = load i16, ptr [[ARRAYIDX_6_7]], align 2
 ; CHECK-NEXT:    [[CONV_6_7:%.*]] = zext i16 [[TMP62]] to i32
 ; CHECK-NEXT:    [[ADD_6_7:%.*]] = add nuw nsw i32 [[ADD_5_7]], [[CONV_6_7]]
 ; CHECK-NEXT:    [[MUL_6_7:%.*]] = mul nuw nsw i32 [[CONV_6_7]], [[CONV_6_7]]
 ; CHECK-NEXT:    [[ADD11_6_7:%.*]] = add i32 [[MUL_6_7]], [[ADD11_5_7]]
-; CHECK-NEXT:    [[ARRAYIDX_7_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 7
-; CHECK-NEXT:    [[TMP63:%.*]] = load i16, i16* [[ARRAYIDX_7_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7_7:%.*]] = getelementptr inbounds i16, ptr [[ADD_PTR_6]], i64 7
+; CHECK-NEXT:    [[TMP63:%.*]] = load i16, ptr [[ARRAYIDX_7_7]], align 2
 ; CHECK-NEXT:    [[CONV_7_7:%.*]] = zext i16 [[TMP63]] to i32
 ; CHECK-NEXT:    [[ADD_7_7:%.*]] = add nuw nsw i32 [[ADD_6_7]], [[CONV_7_7]]
 ; CHECK-NEXT:    [[MUL_7_7:%.*]] = mul nuw nsw i32 [[CONV_7_7]], [[CONV_7_7]]
@@ -403,383 +403,383 @@ define i64 @straight(i16* nocapture noundef readonly %p, i32 noundef %st) {
 ;
 entry:
   %idx.ext = sext i32 %st to i64
-  %0 = load i16, i16* %p, align 2
+  %0 = load i16, ptr %p, align 2
   %conv = zext i16 %0 to i32
   %mul = mul nuw nsw i32 %conv, %conv
-  %arrayidx.1 = getelementptr inbounds i16, i16* %p, i64 1
-  %1 = load i16, i16* %arrayidx.1, align 2
+  %arrayidx.1 = getelementptr inbounds i16, ptr %p, i64 1
+  %1 = load i16, ptr %arrayidx.1, align 2
   %conv.1 = zext i16 %1 to i32
   %add.1 = add nuw nsw i32 %conv, %conv.1
   %mul.1 = mul nuw nsw i32 %conv.1, %conv.1
   %add11.1 = add nuw i32 %mul.1, %mul
-  %arrayidx.2 = getelementptr inbounds i16, i16* %p, i64 2
-  %2 = load i16, i16* %arrayidx.2, align 2
+  %arrayidx.2 = getelementptr inbounds i16, ptr %p, i64 2
+  %2 = load i16, ptr %arrayidx.2, align 2
   %conv.2 = zext i16 %2 to i32
   %add.2 = add nuw nsw i32 %add.1, %conv.2
   %mul.2 = mul nuw nsw i32 %conv.2, %conv.2
   %add11.2 = add i32 %mul.2, %add11.1
-  %arrayidx.3 = getelementptr inbounds i16, i16* %p, i64 3
-  %3 = load i16, i16* %arrayidx.3, align 2
+  %arrayidx.3 = getelementptr inbounds i16, ptr %p, i64 3
+  %3 = load i16, ptr %arrayidx.3, align 2
   %conv.3 = zext i16 %3 to i32
   %add.3 = add nuw nsw i32 %add.2, %conv.3
   %mul.3 = mul nuw nsw i32 %conv.3, %conv.3
   %add11.3 = add i32 %mul.3, %add11.2
-  %arrayidx.4 = getelementptr inbounds i16, i16* %p, i64 4
-  %4 = load i16, i16* %arrayidx.4, align 2
+  %arrayidx.4 = getelementptr inbounds i16, ptr %p, i64 4
+  %4 = load i16, ptr %arrayidx.4, align 2
   %conv.4 = zext i16 %4 to i32
   %add.4 = add nuw nsw i32 %add.3, %conv.4
   %mul.4 = mul nuw nsw i32 %conv.4, %conv.4
   %add11.4 = add i32 %mul.4, %add11.3
-  %arrayidx.5 = getelementptr inbounds i16, i16* %p, i64 5
-  %5 = load i16, i16* %arrayidx.5, align 2
+  %arrayidx.5 = getelementptr inbounds i16, ptr %p, i64 5
+  %5 = load i16, ptr %arrayidx.5, align 2
   %conv.5 = zext i16 %5 to i32
   %add.5 = add nuw nsw i32 %add.4, %conv.5
   %mul.5 = mul nuw nsw i32 %conv.5, %conv.5
   %add11.5 = add i32 %mul.5, %add11.4
-  %arrayidx.6 = getelementptr inbounds i16, i16* %p, i64 6
-  %6 = load i16, i16* %arrayidx.6, align 2
+  %arrayidx.6 = getelementptr inbounds i16, ptr %p, i64 6
+  %6 = load i16, ptr %arrayidx.6, align 2
   %conv.6 = zext i16 %6 to i32
   %add.6 = add nuw nsw i32 %add.5, %conv.6
   %mul.6 = mul nuw nsw i32 %conv.6, %conv.6
   %add11.6 = add i32 %mul.6, %add11.5
-  %arrayidx.7 = getelementptr inbounds i16, i16* %p, i64 7
-  %7 = load i16, i16* %arrayidx.7, align 2
+  %arrayidx.7 = getelementptr inbounds i16, ptr %p, i64 7
+  %7 = load i16, ptr %arrayidx.7, align 2
   %conv.7 = zext i16 %7 to i32
   %add.7 = add nuw nsw i32 %add.6, %conv.7
   %mul.7 = mul nuw nsw i32 %conv.7, %conv.7
   %add11.7 = add i32 %mul.7, %add11.6
-  %add.ptr = getelementptr inbounds i16, i16* %p, i64 %idx.ext
-  %8 = load i16, i16* %add.ptr, align 2
+  %add.ptr = getelementptr inbounds i16, ptr %p, i64 %idx.ext
+  %8 = load i16, ptr %add.ptr, align 2
   %conv.140 = zext i16 %8 to i32
   %add.141 = add nuw nsw i32 %add.7, %conv.140
   %mul.142 = mul nuw nsw i32 %conv.140, %conv.140
   %add11.143 = add i32 %mul.142, %add11.7
-  %arrayidx.1.1 = getelementptr inbounds i16, i16* %add.ptr, i64 1
-  %9 = load i16, i16* %arrayidx.1.1, align 2
+  %arrayidx.1.1 = getelementptr inbounds i16, ptr %add.ptr, i64 1
+  %9 = load i16, ptr %arrayidx.1.1, align 2
   %conv.1.1 = zext i16 %9 to i32
   %add.1.1 = add nuw nsw i32 %add.141, %conv.1.1
   %mul.1.1 = mul nuw nsw i32 %conv.1.1, %conv.1.1
   %add11.1.1 = add i32 %mul.1.1, %add11.143
-  %arrayidx.2.1 = getelementptr inbounds i16, i16* %add.ptr, i64 2
-  %10 = load i16, i16* %arrayidx.2.1, align 2
+  %arrayidx.2.1 = getelementptr inbounds i16, ptr %add.ptr, i64 2
+  %10 = load i16, ptr %arrayidx.2.1, align 2
   %conv.2.1 = zext i16 %10 to i32
   %add.2.1 = add nuw nsw i32 %add.1.1, %conv.2.1
   %mul.2.1 = mul nuw nsw i32 %conv.2.1, %conv.2.1
   %add11.2.1 = add i32 %mul.2.1, %add11.1.1
-  %arrayidx.3.1 = getelementptr inbounds i16, i16* %add.ptr, i64 3
-  %11 = load i16, i16* %arrayidx.3.1, align 2
+  %arrayidx.3.1 = getelementptr inbounds i16, ptr %add.ptr, i64 3
+  %11 = load i16, ptr %arrayidx.3.1, align 2
   %conv.3.1 = zext i16 %11 to i32
   %add.3.1 = add nuw nsw i32 %add.2.1, %conv.3.1
   %mul.3.1 = mul nuw nsw i32 %conv.3.1, %conv.3.1
   %add11.3.1 = add i32 %mul.3.1, %add11.2.1
-  %arrayidx.4.1 = getelementptr inbounds i16, i16* %add.ptr, i64 4
-  %12 = load i16, i16* %arrayidx.4.1, align 2
+  %arrayidx.4.1 = getelementptr inbounds i16, ptr %add.ptr, i64 4
+  %12 = load i16, ptr %arrayidx.4.1, align 2
   %conv.4.1 = zext i16 %12 to i32
   %add.4.1 = add nuw nsw i32 %add.3.1, %conv.4.1
   %mul.4.1 = mul nuw nsw i32 %conv.4.1, %conv.4.1
   %add11.4.1 = add i32 %mul.4.1, %add11.3.1
-  %arrayidx.5.1 = getelementptr inbounds i16, i16* %add.ptr, i64 5
-  %13 = load i16, i16* %arrayidx.5.1, align 2
+  %arrayidx.5.1 = getelementptr inbounds i16, ptr %add.ptr, i64 5
+  %13 = load i16, ptr %arrayidx.5.1, align 2
   %conv.5.1 = zext i16 %13 to i32
   %add.5.1 = add nuw nsw i32 %add.4.1, %conv.5.1
   %mul.5.1 = mul nuw nsw i32 %conv.5.1, %conv.5.1
   %add11.5.1 = add i32 %mul.5.1, %add11.4.1
-  %arrayidx.6.1 = getelementptr inbounds i16, i16* %add.ptr, i64 6
-  %14 = load i16, i16* %arrayidx.6.1, align 2
+  %arrayidx.6.1 = getelementptr inbounds i16, ptr %add.ptr, i64 6
+  %14 = load i16, ptr %arrayidx.6.1, align 2
   %conv.6.1 = zext i16 %14 to i32
   %add.6.1 = add nuw nsw i32 %add.5.1, %conv.6.1
   %mul.6.1 = mul nuw nsw i32 %conv.6.1, %conv.6.1
   %add11.6.1 = add i32 %mul.6.1, %add11.5.1
-  %arrayidx.7.1 = getelementptr inbounds i16, i16* %add.ptr, i64 7
-  %15 = load i16, i16* %arrayidx.7.1, align 2
+  %arrayidx.7.1 = getelementptr inbounds i16, ptr %add.ptr, i64 7
+  %15 = load i16, ptr %arrayidx.7.1, align 2
   %conv.7.1 = zext i16 %15 to i32
   %add.7.1 = add nuw nsw i32 %add.6.1, %conv.7.1
   %mul.7.1 = mul nuw nsw i32 %conv.7.1, %conv.7.1
   %add11.7.1 = add i32 %mul.7.1, %add11.6.1
-  %add.ptr.1 = getelementptr inbounds i16, i16* %add.ptr, i64 %idx.ext
-  %16 = load i16, i16* %add.ptr.1, align 2
+  %add.ptr.1 = getelementptr inbounds i16, ptr %add.ptr, i64 %idx.ext
+  %16 = load i16, ptr %add.ptr.1, align 2
   %conv.244 = zext i16 %16 to i32
   %add.245 = add nuw nsw i32 %add.7.1, %conv.244
   %mul.246 = mul nuw nsw i32 %conv.244, %conv.244
   %add11.247 = add i32 %mul.246, %add11.7.1
-  %arrayidx.1.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 1
-  %17 = load i16, i16* %arrayidx.1.2, align 2
+  %arrayidx.1.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 1
+  %17 = load i16, ptr %arrayidx.1.2, align 2
   %conv.1.2 = zext i16 %17 to i32
   %add.1.2 = add nuw nsw i32 %add.245, %conv.1.2
   %mul.1.2 = mul nuw nsw i32 %conv.1.2, %conv.1.2
   %add11.1.2 = add i32 %mul.1.2, %add11.247
-  %arrayidx.2.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 2
-  %18 = load i16, i16* %arrayidx.2.2, align 2
+  %arrayidx.2.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 2
+  %18 = load i16, ptr %arrayidx.2.2, align 2
   %conv.2.2 = zext i16 %18 to i32
   %add.2.2 = add nuw nsw i32 %add.1.2, %conv.2.2
   %mul.2.2 = mul nuw nsw i32 %conv.2.2, %conv.2.2
   %add11.2.2 = add i32 %mul.2.2, %add11.1.2
-  %arrayidx.3.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 3
-  %19 = load i16, i16* %arrayidx.3.2, align 2
+  %arrayidx.3.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 3
+  %19 = load i16, ptr %arrayidx.3.2, align 2
   %conv.3.2 = zext i16 %19 to i32
   %add.3.2 = add nuw nsw i32 %add.2.2, %conv.3.2
   %mul.3.2 = mul nuw nsw i32 %conv.3.2, %conv.3.2
   %add11.3.2 = add i32 %mul.3.2, %add11.2.2
-  %arrayidx.4.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 4
-  %20 = load i16, i16* %arrayidx.4.2, align 2
+  %arrayidx.4.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 4
+  %20 = load i16, ptr %arrayidx.4.2, align 2
   %conv.4.2 = zext i16 %20 to i32
   %add.4.2 = add nuw nsw i32 %add.3.2, %conv.4.2
   %mul.4.2 = mul nuw nsw i32 %conv.4.2, %conv.4.2
   %add11.4.2 = add i32 %mul.4.2, %add11.3.2
-  %arrayidx.5.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 5
-  %21 = load i16, i16* %arrayidx.5.2, align 2
+  %arrayidx.5.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 5
+  %21 = load i16, ptr %arrayidx.5.2, align 2
   %conv.5.2 = zext i16 %21 to i32
   %add.5.2 = add nuw nsw i32 %add.4.2, %conv.5.2
   %mul.5.2 = mul nuw nsw i32 %conv.5.2, %conv.5.2
   %add11.5.2 = add i32 %mul.5.2, %add11.4.2
-  %arrayidx.6.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 6
-  %22 = load i16, i16* %arrayidx.6.2, align 2
+  %arrayidx.6.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 6
+  %22 = load i16, ptr %arrayidx.6.2, align 2
   %conv.6.2 = zext i16 %22 to i32
   %add.6.2 = add nuw nsw i32 %add.5.2, %conv.6.2
   %mul.6.2 = mul nuw nsw i32 %conv.6.2, %conv.6.2
   %add11.6.2 = add i32 %mul.6.2, %add11.5.2
-  %arrayidx.7.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 7
-  %23 = load i16, i16* %arrayidx.7.2, align 2
+  %arrayidx.7.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 7
+  %23 = load i16, ptr %arrayidx.7.2, align 2
   %conv.7.2 = zext i16 %23 to i32
   %add.7.2 = add nuw nsw i32 %add.6.2, %conv.7.2
   %mul.7.2 = mul nuw nsw i32 %conv.7.2, %conv.7.2
   %add11.7.2 = add i32 %mul.7.2, %add11.6.2
-  %add.ptr.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 %idx.ext
-  %24 = load i16, i16* %add.ptr.2, align 2
+  %add.ptr.2 = getelementptr inbounds i16, ptr %add.ptr.1, i64 %idx.ext
+  %24 = load i16, ptr %add.ptr.2, align 2
   %conv.348 = zext i16 %24 to i32
   %add.349 = add nuw nsw i32 %add.7.2, %conv.348
   %mul.350 = mul nuw nsw i32 %conv.348, %conv.348
   %add11.351 = add i32 %mul.350, %add11.7.2
-  %arrayidx.1.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 1
-  %25 = load i16, i16* %arrayidx.1.3, align 2
+  %arrayidx.1.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 1
+  %25 = load i16, ptr %arrayidx.1.3, align 2
   %conv.1.3 = zext i16 %25 to i32
   %add.1.3 = add nuw nsw i32 %add.349, %conv.1.3
   %mul.1.3 = mul nuw nsw i32 %conv.1.3, %conv.1.3
   %add11.1.3 = add i32 %mul.1.3, %add11.351
-  %arrayidx.2.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 2
-  %26 = load i16, i16* %arrayidx.2.3, align 2
+  %arrayidx.2.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 2
+  %26 = load i16, ptr %arrayidx.2.3, align 2
   %conv.2.3 = zext i16 %26 to i32
   %add.2.3 = add nuw nsw i32 %add.1.3, %conv.2.3
   %mul.2.3 = mul nuw nsw i32 %conv.2.3, %conv.2.3
   %add11.2.3 = add i32 %mul.2.3, %add11.1.3
-  %arrayidx.3.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 3
-  %27 = load i16, i16* %arrayidx.3.3, align 2
+  %arrayidx.3.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 3
+  %27 = load i16, ptr %arrayidx.3.3, align 2
   %conv.3.3 = zext i16 %27 to i32
   %add.3.3 = add nuw nsw i32 %add.2.3, %conv.3.3
   %mul.3.3 = mul nuw nsw i32 %conv.3.3, %conv.3.3
   %add11.3.3 = add i32 %mul.3.3, %add11.2.3
-  %arrayidx.4.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 4
-  %28 = load i16, i16* %arrayidx.4.3, align 2
+  %arrayidx.4.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 4
+  %28 = load i16, ptr %arrayidx.4.3, align 2
   %conv.4.3 = zext i16 %28 to i32
   %add.4.3 = add nuw nsw i32 %add.3.3, %conv.4.3
   %mul.4.3 = mul nuw nsw i32 %conv.4.3, %conv.4.3
   %add11.4.3 = add i32 %mul.4.3, %add11.3.3
-  %arrayidx.5.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 5
-  %29 = load i16, i16* %arrayidx.5.3, align 2
+  %arrayidx.5.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 5
+  %29 = load i16, ptr %arrayidx.5.3, align 2
   %conv.5.3 = zext i16 %29 to i32
   %add.5.3 = add nuw nsw i32 %add.4.3, %conv.5.3
   %mul.5.3 = mul nuw nsw i32 %conv.5.3, %conv.5.3
   %add11.5.3 = add i32 %mul.5.3, %add11.4.3
-  %arrayidx.6.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 6
-  %30 = load i16, i16* %arrayidx.6.3, align 2
+  %arrayidx.6.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 6
+  %30 = load i16, ptr %arrayidx.6.3, align 2
   %conv.6.3 = zext i16 %30 to i32
   %add.6.3 = add nuw nsw i32 %add.5.3, %conv.6.3
   %mul.6.3 = mul nuw nsw i32 %conv.6.3, %conv.6.3
   %add11.6.3 = add i32 %mul.6.3, %add11.5.3
-  %arrayidx.7.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 7
-  %31 = load i16, i16* %arrayidx.7.3, align 2
+  %arrayidx.7.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 7
+  %31 = load i16, ptr %arrayidx.7.3, align 2
   %conv.7.3 = zext i16 %31 to i32
   %add.7.3 = add nuw nsw i32 %add.6.3, %conv.7.3
   %mul.7.3 = mul nuw nsw i32 %conv.7.3, %conv.7.3
   %add11.7.3 = add i32 %mul.7.3, %add11.6.3
-  %add.ptr.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 %idx.ext
-  %32 = load i16, i16* %add.ptr.3, align 2
+  %add.ptr.3 = getelementptr inbounds i16, ptr %add.ptr.2, i64 %idx.ext
+  %32 = load i16, ptr %add.ptr.3, align 2
   %conv.452 = zext i16 %32 to i32
   %add.453 = add nuw nsw i32 %add.7.3, %conv.452
   %mul.454 = mul nuw nsw i32 %conv.452, %conv.452
   %add11.455 = add i32 %mul.454, %add11.7.3
-  %arrayidx.1.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 1
-  %33 = load i16, i16* %arrayidx.1.4, align 2
+  %arrayidx.1.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 1
+  %33 = load i16, ptr %arrayidx.1.4, align 2
   %conv.1.4 = zext i16 %33 to i32
   %add.1.4 = add nuw nsw i32 %add.453, %conv.1.4
   %mul.1.4 = mul nuw nsw i32 %conv.1.4, %conv.1.4
   %add11.1.4 = add i32 %mul.1.4, %add11.455
-  %arrayidx.2.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 2
-  %34 = load i16, i16* %arrayidx.2.4, align 2
+  %arrayidx.2.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 2
+  %34 = load i16, ptr %arrayidx.2.4, align 2
   %conv.2.4 = zext i16 %34 to i32
   %add.2.4 = add nuw nsw i32 %add.1.4, %conv.2.4
   %mul.2.4 = mul nuw nsw i32 %conv.2.4, %conv.2.4
   %add11.2.4 = add i32 %mul.2.4, %add11.1.4
-  %arrayidx.3.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 3
-  %35 = load i16, i16* %arrayidx.3.4, align 2
+  %arrayidx.3.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 3
+  %35 = load i16, ptr %arrayidx.3.4, align 2
   %conv.3.4 = zext i16 %35 to i32
   %add.3.4 = add nuw nsw i32 %add.2.4, %conv.3.4
   %mul.3.4 = mul nuw nsw i32 %conv.3.4, %conv.3.4
   %add11.3.4 = add i32 %mul.3.4, %add11.2.4
-  %arrayidx.4.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 4
-  %36 = load i16, i16* %arrayidx.4.4, align 2
+  %arrayidx.4.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 4
+  %36 = load i16, ptr %arrayidx.4.4, align 2
   %conv.4.4 = zext i16 %36 to i32
   %add.4.4 = add nuw nsw i32 %add.3.4, %conv.4.4
   %mul.4.4 = mul nuw nsw i32 %conv.4.4, %conv.4.4
   %add11.4.4 = add i32 %mul.4.4, %add11.3.4
-  %arrayidx.5.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 5
-  %37 = load i16, i16* %arrayidx.5.4, align 2
+  %arrayidx.5.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 5
+  %37 = load i16, ptr %arrayidx.5.4, align 2
   %conv.5.4 = zext i16 %37 to i32
   %add.5.4 = add nuw nsw i32 %add.4.4, %conv.5.4
   %mul.5.4 = mul nuw nsw i32 %conv.5.4, %conv.5.4
   %add11.5.4 = add i32 %mul.5.4, %add11.4.4
-  %arrayidx.6.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 6
-  %38 = load i16, i16* %arrayidx.6.4, align 2
+  %arrayidx.6.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 6
+  %38 = load i16, ptr %arrayidx.6.4, align 2
   %conv.6.4 = zext i16 %38 to i32
   %add.6.4 = add nuw nsw i32 %add.5.4, %conv.6.4
   %mul.6.4 = mul nuw nsw i32 %conv.6.4, %conv.6.4
   %add11.6.4 = add i32 %mul.6.4, %add11.5.4
-  %arrayidx.7.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 7
-  %39 = load i16, i16* %arrayidx.7.4, align 2
+  %arrayidx.7.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 7
+  %39 = load i16, ptr %arrayidx.7.4, align 2
   %conv.7.4 = zext i16 %39 to i32
   %add.7.4 = add nuw nsw i32 %add.6.4, %conv.7.4
   %mul.7.4 = mul nuw nsw i32 %conv.7.4, %conv.7.4
   %add11.7.4 = add i32 %mul.7.4, %add11.6.4
-  %add.ptr.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 %idx.ext
-  %40 = load i16, i16* %add.ptr.4, align 2
+  %add.ptr.4 = getelementptr inbounds i16, ptr %add.ptr.3, i64 %idx.ext
+  %40 = load i16, ptr %add.ptr.4, align 2
   %conv.556 = zext i16 %40 to i32
   %add.557 = add nuw nsw i32 %add.7.4, %conv.556
   %mul.558 = mul nuw nsw i32 %conv.556, %conv.556
   %add11.559 = add i32 %mul.558, %add11.7.4
-  %arrayidx.1.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 1
-  %41 = load i16, i16* %arrayidx.1.5, align 2
+  %arrayidx.1.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 1
+  %41 = load i16, ptr %arrayidx.1.5, align 2
   %conv.1.5 = zext i16 %41 to i32
   %add.1.5 = add nuw nsw i32 %add.557, %conv.1.5
   %mul.1.5 = mul nuw nsw i32 %conv.1.5, %conv.1.5
   %add11.1.5 = add i32 %mul.1.5, %add11.559
-  %arrayidx.2.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 2
-  %42 = load i16, i16* %arrayidx.2.5, align 2
+  %arrayidx.2.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 2
+  %42 = load i16, ptr %arrayidx.2.5, align 2
   %conv.2.5 = zext i16 %42 to i32
   %add.2.5 = add nuw nsw i32 %add.1.5, %conv.2.5
   %mul.2.5 = mul nuw nsw i32 %conv.2.5, %conv.2.5
   %add11.2.5 = add i32 %mul.2.5, %add11.1.5
-  %arrayidx.3.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 3
-  %43 = load i16, i16* %arrayidx.3.5, align 2
+  %arrayidx.3.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 3
+  %43 = load i16, ptr %arrayidx.3.5, align 2
   %conv.3.5 = zext i16 %43 to i32
   %add.3.5 = add nuw nsw i32 %add.2.5, %conv.3.5
   %mul.3.5 = mul nuw nsw i32 %conv.3.5, %conv.3.5
   %add11.3.5 = add i32 %mul.3.5, %add11.2.5
-  %arrayidx.4.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 4
-  %44 = load i16, i16* %arrayidx.4.5, align 2
+  %arrayidx.4.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 4
+  %44 = load i16, ptr %arrayidx.4.5, align 2
   %conv.4.5 = zext i16 %44 to i32
   %add.4.5 = add nuw nsw i32 %add.3.5, %conv.4.5
   %mul.4.5 = mul nuw nsw i32 %conv.4.5, %conv.4.5
   %add11.4.5 = add i32 %mul.4.5, %add11.3.5
-  %arrayidx.5.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 5
-  %45 = load i16, i16* %arrayidx.5.5, align 2
+  %arrayidx.5.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 5
+  %45 = load i16, ptr %arrayidx.5.5, align 2
   %conv.5.5 = zext i16 %45 to i32
   %add.5.5 = add nuw nsw i32 %add.4.5, %conv.5.5
   %mul.5.5 = mul nuw nsw i32 %conv.5.5, %conv.5.5
   %add11.5.5 = add i32 %mul.5.5, %add11.4.5
-  %arrayidx.6.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 6
-  %46 = load i16, i16* %arrayidx.6.5, align 2
+  %arrayidx.6.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 6
+  %46 = load i16, ptr %arrayidx.6.5, align 2
   %conv.6.5 = zext i16 %46 to i32
   %add.6.5 = add nuw nsw i32 %add.5.5, %conv.6.5
   %mul.6.5 = mul nuw nsw i32 %conv.6.5, %conv.6.5
   %add11.6.5 = add i32 %mul.6.5, %add11.5.5
-  %arrayidx.7.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 7
-  %47 = load i16, i16* %arrayidx.7.5, align 2
+  %arrayidx.7.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 7
+  %47 = load i16, ptr %arrayidx.7.5, align 2
   %conv.7.5 = zext i16 %47 to i32
   %add.7.5 = add nuw nsw i32 %add.6.5, %conv.7.5
   %mul.7.5 = mul nuw nsw i32 %conv.7.5, %conv.7.5
   %add11.7.5 = add i32 %mul.7.5, %add11.6.5
-  %add.ptr.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 %idx.ext
-  %48 = load i16, i16* %add.ptr.5, align 2
+  %add.ptr.5 = getelementptr inbounds i16, ptr %add.ptr.4, i64 %idx.ext
+  %48 = load i16, ptr %add.ptr.5, align 2
   %conv.660 = zext i16 %48 to i32
   %add.661 = add nuw nsw i32 %add.7.5, %conv.660
   %mul.662 = mul nuw nsw i32 %conv.660, %conv.660
   %add11.663 = add i32 %mul.662, %add11.7.5
-  %arrayidx.1.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 1
-  %49 = load i16, i16* %arrayidx.1.6, align 2
+  %arrayidx.1.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 1
+  %49 = load i16, ptr %arrayidx.1.6, align 2
   %conv.1.6 = zext i16 %49 to i32
   %add.1.6 = add nuw nsw i32 %add.661, %conv.1.6
   %mul.1.6 = mul nuw nsw i32 %conv.1.6, %conv.1.6
   %add11.1.6 = add i32 %mul.1.6, %add11.663
-  %arrayidx.2.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 2
-  %50 = load i16, i16* %arrayidx.2.6, align 2
+  %arrayidx.2.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 2
+  %50 = load i16, ptr %arrayidx.2.6, align 2
   %conv.2.6 = zext i16 %50 to i32
   %add.2.6 = add nuw nsw i32 %add.1.6, %conv.2.6
   %mul.2.6 = mul nuw nsw i32 %conv.2.6, %conv.2.6
   %add11.2.6 = add i32 %mul.2.6, %add11.1.6
-  %arrayidx.3.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 3
-  %51 = load i16, i16* %arrayidx.3.6, align 2
+  %arrayidx.3.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 3
+  %51 = load i16, ptr %arrayidx.3.6, align 2
   %conv.3.6 = zext i16 %51 to i32
   %add.3.6 = add nuw nsw i32 %add.2.6, %conv.3.6
   %mul.3.6 = mul nuw nsw i32 %conv.3.6, %conv.3.6
   %add11.3.6 = add i32 %mul.3.6, %add11.2.6
-  %arrayidx.4.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 4
-  %52 = load i16, i16* %arrayidx.4.6, align 2
+  %arrayidx.4.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 4
+  %52 = load i16, ptr %arrayidx.4.6, align 2
   %conv.4.6 = zext i16 %52 to i32
   %add.4.6 = add nuw nsw i32 %add.3.6, %conv.4.6
   %mul.4.6 = mul nuw nsw i32 %conv.4.6, %conv.4.6
   %add11.4.6 = add i32 %mul.4.6, %add11.3.6
-  %arrayidx.5.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 5
-  %53 = load i16, i16* %arrayidx.5.6, align 2
+  %arrayidx.5.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 5
+  %53 = load i16, ptr %arrayidx.5.6, align 2
   %conv.5.6 = zext i16 %53 to i32
   %add.5.6 = add nuw nsw i32 %add.4.6, %conv.5.6
   %mul.5.6 = mul nuw nsw i32 %conv.5.6, %conv.5.6
   %add11.5.6 = add i32 %mul.5.6, %add11.4.6
-  %arrayidx.6.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 6
-  %54 = load i16, i16* %arrayidx.6.6, align 2
+  %arrayidx.6.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 6
+  %54 = load i16, ptr %arrayidx.6.6, align 2
   %conv.6.6 = zext i16 %54 to i32
   %add.6.6 = add nuw nsw i32 %add.5.6, %conv.6.6
   %mul.6.6 = mul nuw nsw i32 %conv.6.6, %conv.6.6
   %add11.6.6 = add i32 %mul.6.6, %add11.5.6
-  %arrayidx.7.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 7
-  %55 = load i16, i16* %arrayidx.7.6, align 2
+  %arrayidx.7.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 7
+  %55 = load i16, ptr %arrayidx.7.6, align 2
   %conv.7.6 = zext i16 %55 to i32
   %add.7.6 = add nuw nsw i32 %add.6.6, %conv.7.6
   %mul.7.6 = mul nuw nsw i32 %conv.7.6, %conv.7.6
   %add11.7.6 = add i32 %mul.7.6, %add11.6.6
-  %add.ptr.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 %idx.ext
-  %56 = load i16, i16* %add.ptr.6, align 2
+  %add.ptr.6 = getelementptr inbounds i16, ptr %add.ptr.5, i64 %idx.ext
+  %56 = load i16, ptr %add.ptr.6, align 2
   %conv.764 = zext i16 %56 to i32
   %add.765 = add nuw nsw i32 %add.7.6, %conv.764
   %mul.766 = mul nuw nsw i32 %conv.764, %conv.764
   %add11.767 = add i32 %mul.766, %add11.7.6
-  %arrayidx.1.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 1
-  %57 = load i16, i16* %arrayidx.1.7, align 2
+  %arrayidx.1.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 1
+  %57 = load i16, ptr %arrayidx.1.7, align 2
   %conv.1.7 = zext i16 %57 to i32
   %add.1.7 = add nuw nsw i32 %add.765, %conv.1.7
   %mul.1.7 = mul nuw nsw i32 %conv.1.7, %conv.1.7
   %add11.1.7 = add i32 %mul.1.7, %add11.767
-  %arrayidx.2.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 2
-  %58 = load i16, i16* %arrayidx.2.7, align 2
+  %arrayidx.2.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 2
+  %58 = load i16, ptr %arrayidx.2.7, align 2
   %conv.2.7 = zext i16 %58 to i32
   %add.2.7 = add nuw nsw i32 %add.1.7, %conv.2.7
   %mul.2.7 = mul nuw nsw i32 %conv.2.7, %conv.2.7
   %add11.2.7 = add i32 %mul.2.7, %add11.1.7
-  %arrayidx.3.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 3
-  %59 = load i16, i16* %arrayidx.3.7, align 2
+  %arrayidx.3.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 3
+  %59 = load i16, ptr %arrayidx.3.7, align 2
   %conv.3.7 = zext i16 %59 to i32
   %add.3.7 = add nuw nsw i32 %add.2.7, %conv.3.7
   %mul.3.7 = mul nuw nsw i32 %conv.3.7, %conv.3.7
   %add11.3.7 = add i32 %mul.3.7, %add11.2.7
-  %arrayidx.4.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 4
-  %60 = load i16, i16* %arrayidx.4.7, align 2
+  %arrayidx.4.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 4
+  %60 = load i16, ptr %arrayidx.4.7, align 2
   %conv.4.7 = zext i16 %60 to i32
   %add.4.7 = add nuw nsw i32 %add.3.7, %conv.4.7
   %mul.4.7 = mul nuw nsw i32 %conv.4.7, %conv.4.7
   %add11.4.7 = add i32 %mul.4.7, %add11.3.7
-  %arrayidx.5.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 5
-  %61 = load i16, i16* %arrayidx.5.7, align 2
+  %arrayidx.5.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 5
+  %61 = load i16, ptr %arrayidx.5.7, align 2
   %conv.5.7 = zext i16 %61 to i32
   %add.5.7 = add nuw nsw i32 %add.4.7, %conv.5.7
   %mul.5.7 = mul nuw nsw i32 %conv.5.7, %conv.5.7
   %add11.5.7 = add i32 %mul.5.7, %add11.4.7
-  %arrayidx.6.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 6
-  %62 = load i16, i16* %arrayidx.6.7, align 2
+  %arrayidx.6.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 6
+  %62 = load i16, ptr %arrayidx.6.7, align 2
   %conv.6.7 = zext i16 %62 to i32
   %add.6.7 = add nuw nsw i32 %add.5.7, %conv.6.7
   %mul.6.7 = mul nuw nsw i32 %conv.6.7, %conv.6.7
   %add11.6.7 = add i32 %mul.6.7, %add11.5.7
-  %arrayidx.7.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 7
-  %63 = load i16, i16* %arrayidx.7.7, align 2
+  %arrayidx.7.7 = getelementptr inbounds i16, ptr %add.ptr.6, i64 7
+  %63 = load i16, ptr %arrayidx.7.7, align 2
   %conv.7.7 = zext i16 %63 to i32
   %add.7.7 = add nuw nsw i32 %add.6.7, %conv.7.7
   %mul.7.7 = mul nuw nsw i32 %conv.7.7, %conv.7.7
@@ -791,7 +791,7 @@ entry:
   ret i64 %add17
 }
 
-define i64 @looped(i16* nocapture noundef readonly %p, i32 noundef %st) {
+define i64 @looped(ptr nocapture noundef readonly %p, i32 noundef %st) {
 ; CHECK-LABEL: @looped(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64
@@ -800,103 +800,103 @@ define i64 @looped(i16* nocapture noundef readonly %p, i32 noundef %st) {
 ; CHECK-NEXT:    [[Y_038:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC13:%.*]], [[FOR_COND1_PREHEADER]] ]
 ; CHECK-NEXT:    [[SQ_037:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD11_15:%.*]], [[FOR_COND1_PREHEADER]] ]
 ; CHECK-NEXT:    [[SM_036:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_15:%.*]], [[FOR_COND1_PREHEADER]] ]
-; CHECK-NEXT:    [[P_ADDR_035:%.*]] = phi i16* [ [[P:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND1_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[P_ADDR_035]], align 2
+; CHECK-NEXT:    [[P_ADDR_035:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[P_ADDR_035]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SM_036]], [[CONV]]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
 ; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[MUL]], [[SQ_037]]
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 2
 ; CHECK-NEXT:    [[CONV_1:%.*]] = zext i16 [[TMP1]] to i32
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD]], [[CONV_1]]
 ; CHECK-NEXT:    [[MUL_1:%.*]] = mul nuw nsw i32 [[CONV_1]], [[CONV_1]]
 ; CHECK-NEXT:    [[ADD11_1:%.*]] = add i32 [[MUL_1]], [[ADD11]]
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 2
 ; CHECK-NEXT:    [[CONV_2:%.*]] = zext i16 [[TMP2]] to i32
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[CONV_2]]
 ; CHECK-NEXT:    [[MUL_2:%.*]] = mul nuw nsw i32 [[CONV_2]], [[CONV_2]]
 ; CHECK-NEXT:    [[ADD11_2:%.*]] = add i32 [[MUL_2]], [[ADD11_1]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 3
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 2
 ; CHECK-NEXT:    [[CONV_3:%.*]] = zext i16 [[TMP3]] to i32
 ; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD_2]], [[CONV_3]]
 ; CHECK-NEXT:    [[MUL_3:%.*]] = mul nuw nsw i32 [[CONV_3]], [[CONV_3]]
 ; CHECK-NEXT:    [[ADD11_3:%.*]] = add i32 [[MUL_3]], [[ADD11_2]]
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_4]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_4]], align 2
 ; CHECK-NEXT:    [[CONV_4:%.*]] = zext i16 [[TMP4]] to i32
 ; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[ADD_3]], [[CONV_4]]
 ; CHECK-NEXT:    [[MUL_4:%.*]] = mul nuw nsw i32 [[CONV_4]], [[CONV_4]]
 ; CHECK-NEXT:    [[ADD11_4:%.*]] = add i32 [[MUL_4]], [[ADD11_3]]
-; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 5
-; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX_5]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 5
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
 ; CHECK-NEXT:    [[CONV_5:%.*]] = zext i16 [[TMP5]] to i32
 ; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[ADD_4]], [[CONV_5]]
 ; CHECK-NEXT:    [[MUL_5:%.*]] = mul nuw nsw i32 [[CONV_5]], [[CONV_5]]
 ; CHECK-NEXT:    [[ADD11_5:%.*]] = add i32 [[MUL_5]], [[ADD11_4]]
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 6
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_6]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 6
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
 ; CHECK-NEXT:    [[CONV_6:%.*]] = zext i16 [[TMP6]] to i32
 ; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 [[ADD_5]], [[CONV_6]]
 ; CHECK-NEXT:    [[MUL_6:%.*]] = mul nuw nsw i32 [[CONV_6]], [[CONV_6]]
 ; CHECK-NEXT:    [[ADD11_6:%.*]] = add i32 [[MUL_6]], [[ADD11_5]]
-; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 7
-; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_7]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 7
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
 ; CHECK-NEXT:    [[CONV_7:%.*]] = zext i16 [[TMP7]] to i32
 ; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 [[ADD_6]], [[CONV_7]]
 ; CHECK-NEXT:    [[MUL_7:%.*]] = mul nuw nsw i32 [[CONV_7]], [[CONV_7]]
 ; CHECK-NEXT:    [[ADD11_7:%.*]] = add i32 [[MUL_7]], [[ADD11_6]]
-; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 8
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_8]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_8]], align 2
 ; CHECK-NEXT:    [[CONV_8:%.*]] = zext i16 [[TMP8]] to i32
 ; CHECK-NEXT:    [[ADD_8:%.*]] = add i32 [[ADD_7]], [[CONV_8]]
 ; CHECK-NEXT:    [[MUL_8:%.*]] = mul nuw nsw i32 [[CONV_8]], [[CONV_8]]
 ; CHECK-NEXT:    [[ADD11_8:%.*]] = add i32 [[MUL_8]], [[ADD11_7]]
-; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 9
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_9]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 9
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX_9]], align 2
 ; CHECK-NEXT:    [[CONV_9:%.*]] = zext i16 [[TMP9]] to i32
 ; CHECK-NEXT:    [[ADD_9:%.*]] = add i32 [[ADD_8]], [[CONV_9]]
 ; CHECK-NEXT:    [[MUL_9:%.*]] = mul nuw nsw i32 [[CONV_9]], [[CONV_9]]
 ; CHECK-NEXT:    [[ADD11_9:%.*]] = add i32 [[MUL_9]], [[ADD11_8]]
-; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 10
-; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_10]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 10
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX_10]], align 2
 ; CHECK-NEXT:    [[CONV_10:%.*]] = zext i16 [[TMP10]] to i32
 ; CHECK-NEXT:    [[ADD_10:%.*]] = add i32 [[ADD_9]], [[CONV_10]]
 ; CHECK-NEXT:    [[MUL_10:%.*]] = mul nuw nsw i32 [[CONV_10]], [[CONV_10]]
 ; CHECK-NEXT:    [[ADD11_10:%.*]] = add i32 [[MUL_10]], [[ADD11_9]]
-; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 11
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_11]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 11
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX_11]], align 2
 ; CHECK-NEXT:    [[CONV_11:%.*]] = zext i16 [[TMP11]] to i32
 ; CHECK-NEXT:    [[ADD_11:%.*]] = add i32 [[ADD_10]], [[CONV_11]]
 ; CHECK-NEXT:    [[MUL_11:%.*]] = mul nuw nsw i32 [[CONV_11]], [[CONV_11]]
 ; CHECK-NEXT:    [[ADD11_11:%.*]] = add i32 [[MUL_11]], [[ADD11_10]]
-; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 12
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_12]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 12
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX_12]], align 2
 ; CHECK-NEXT:    [[CONV_12:%.*]] = zext i16 [[TMP12]] to i32
 ; CHECK-NEXT:    [[ADD_12:%.*]] = add i32 [[ADD_11]], [[CONV_12]]
 ; CHECK-NEXT:    [[MUL_12:%.*]] = mul nuw nsw i32 [[CONV_12]], [[CONV_12]]
 ; CHECK-NEXT:    [[ADD11_12:%.*]] = add i32 [[MUL_12]], [[ADD11_11]]
-; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 13
-; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_13]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 13
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX_13]], align 2
 ; CHECK-NEXT:    [[CONV_13:%.*]] = zext i16 [[TMP13]] to i32
 ; CHECK-NEXT:    [[ADD_13:%.*]] = add i32 [[ADD_12]], [[CONV_13]]
 ; CHECK-NEXT:    [[MUL_13:%.*]] = mul nuw nsw i32 [[CONV_13]], [[CONV_13]]
 ; CHECK-NEXT:    [[ADD11_13:%.*]] = add i32 [[MUL_13]], [[ADD11_12]]
-; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 14
-; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX_14]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 14
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX_14]], align 2
 ; CHECK-NEXT:    [[CONV_14:%.*]] = zext i16 [[TMP14]] to i32
 ; CHECK-NEXT:    [[ADD_14:%.*]] = add i32 [[ADD_13]], [[CONV_14]]
 ; CHECK-NEXT:    [[MUL_14:%.*]] = mul nuw nsw i32 [[CONV_14]], [[CONV_14]]
 ; CHECK-NEXT:    [[ADD11_14:%.*]] = add i32 [[MUL_14]], [[ADD11_13]]
-; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 15
-; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX_15]], align 2
+; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 15
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, ptr [[ARRAYIDX_15]], align 2
 ; CHECK-NEXT:    [[CONV_15:%.*]] = zext i16 [[TMP15]] to i32
 ; CHECK-NEXT:    [[ADD_15]] = add i32 [[ADD_14]], [[CONV_15]]
 ; CHECK-NEXT:    [[MUL_15:%.*]] = mul nuw nsw i32 [[CONV_15]], [[CONV_15]]
 ; CHECK-NEXT:    [[ADD11_15]] = add i32 [[MUL_15]], [[ADD11_14]]
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, ptr [[P_ADDR_035]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[INC13]] = add nuw nsw i32 [[Y_038]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC13]], 16
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
@@ -915,103 +915,103 @@ for.cond1.preheader:                              ; preds = %entry, %for.cond1.p
   %y.038 = phi i32 [ 0, %entry ], [ %inc13, %for.cond1.preheader ]
   %sq.037 = phi i32 [ 0, %entry ], [ %add11.15, %for.cond1.preheader ]
   %sm.036 = phi i32 [ 0, %entry ], [ %add.15, %for.cond1.preheader ]
-  %p.addr.035 = phi i16* [ %p, %entry ], [ %add.ptr, %for.cond1.preheader ]
-  %0 = load i16, i16* %p.addr.035, align 2
+  %p.addr.035 = phi ptr [ %p, %entry ], [ %add.ptr, %for.cond1.preheader ]
+  %0 = load i16, ptr %p.addr.035, align 2
   %conv = zext i16 %0 to i32
   %add = add i32 %sm.036, %conv
   %mul = mul nuw nsw i32 %conv, %conv
   %add11 = add i32 %mul, %sq.037
-  %arrayidx.1 = getelementptr inbounds i16, i16* %p.addr.035, i64 1
-  %1 = load i16, i16* %arrayidx.1, align 2
+  %arrayidx.1 = getelementptr inbounds i16, ptr %p.addr.035, i64 1
+  %1 = load i16, ptr %arrayidx.1, align 2
   %conv.1 = zext i16 %1 to i32
   %add.1 = add i32 %add, %conv.1
   %mul.1 = mul nuw nsw i32 %conv.1, %conv.1
   %add11.1 = add i32 %mul.1, %add11
-  %arrayidx.2 = getelementptr inbounds i16, i16* %p.addr.035, i64 2
-  %2 = load i16, i16* %arrayidx.2, align 2
+  %arrayidx.2 = getelementptr inbounds i16, ptr %p.addr.035, i64 2
+  %2 = load i16, ptr %arrayidx.2, align 2
   %conv.2 = zext i16 %2 to i32
   %add.2 = add i32 %add.1, %conv.2
   %mul.2 = mul nuw nsw i32 %conv.2, %conv.2
   %add11.2 = add i32 %mul.2, %add11.1
-  %arrayidx.3 = getelementptr inbounds i16, i16* %p.addr.035, i64 3
-  %3 = load i16, i16* %arrayidx.3, align 2
+  %arrayidx.3 = getelementptr inbounds i16, ptr %p.addr.035, i64 3
+  %3 = load i16, ptr %arrayidx.3, align 2
   %conv.3 = zext i16 %3 to i32
   %add.3 = add i32 %add.2, %conv.3
   %mul.3 = mul nuw nsw i32 %conv.3, %conv.3
   %add11.3 = add i32 %mul.3, %add11.2
-  %arrayidx.4 = getelementptr inbounds i16, i16* %p.addr.035, i64 4
-  %4 = load i16, i16* %arrayidx.4, align 2
+  %arrayidx.4 = getelementptr inbounds i16, ptr %p.addr.035, i64 4
+  %4 = load i16, ptr %arrayidx.4, align 2
   %conv.4 = zext i16 %4 to i32
   %add.4 = add i32 %add.3, %conv.4
   %mul.4 = mul nuw nsw i32 %conv.4, %conv.4
   %add11.4 = add i32 %mul.4, %add11.3
-  %arrayidx.5 = getelementptr inbounds i16, i16* %p.addr.035, i64 5
-  %5 = load i16, i16* %arrayidx.5, align 2
+  %arrayidx.5 = getelementptr inbounds i16, ptr %p.addr.035, i64 5
+  %5 = load i16, ptr %arrayidx.5, align 2
   %conv.5 = zext i16 %5 to i32
   %add.5 = add i32 %add.4, %conv.5
   %mul.5 = mul nuw nsw i32 %conv.5, %conv.5
   %add11.5 = add i32 %mul.5, %add11.4
-  %arrayidx.6 = getelementptr inbounds i16, i16* %p.addr.035, i64 6
-  %6 = load i16, i16* %arrayidx.6, align 2
+  %arrayidx.6 = getelementptr inbounds i16, ptr %p.addr.035, i64 6
+  %6 = load i16, ptr %arrayidx.6, align 2
   %conv.6 = zext i16 %6 to i32
   %add.6 = add i32 %add.5, %conv.6
   %mul.6 = mul nuw nsw i32 %conv.6, %conv.6
   %add11.6 = add i32 %mul.6, %add11.5
-  %arrayidx.7 = getelementptr inbounds i16, i16* %p.addr.035, i64 7
-  %7 = load i16, i16* %arrayidx.7, align 2
+  %arrayidx.7 = getelementptr inbounds i16, ptr %p.addr.035, i64 7
+  %7 = load i16, ptr %arrayidx.7, align 2
   %conv.7 = zext i16 %7 to i32
   %add.7 = add i32 %add.6, %conv.7
   %mul.7 = mul nuw nsw i32 %conv.7, %conv.7
   %add11.7 = add i32 %mul.7, %add11.6
-  %arrayidx.8 = getelementptr inbounds i16, i16* %p.addr.035, i64 8
-  %8 = load i16, i16* %arrayidx.8, align 2
+  %arrayidx.8 = getelementptr inbounds i16, ptr %p.addr.035, i64 8
+  %8 = load i16, ptr %arrayidx.8, align 2
   %conv.8 = zext i16 %8 to i32
   %add.8 = add i32 %add.7, %conv.8
   %mul.8 = mul nuw nsw i32 %conv.8, %conv.8
   %add11.8 = add i32 %mul.8, %add11.7
-  %arrayidx.9 = getelementptr inbounds i16, i16* %p.addr.035, i64 9
-  %9 = load i16, i16* %arrayidx.9, align 2
+  %arrayidx.9 = getelementptr inbounds i16, ptr %p.addr.035, i64 9
+  %9 = load i16, ptr %arrayidx.9, align 2
   %conv.9 = zext i16 %9 to i32
   %add.9 = add i32 %add.8, %conv.9
   %mul.9 = mul nuw nsw i32 %conv.9, %conv.9
   %add11.9 = add i32 %mul.9, %add11.8
-  %arrayidx.10 = getelementptr inbounds i16, i16* %p.addr.035, i64 10
-  %10 = load i16, i16* %arrayidx.10, align 2
+  %arrayidx.10 = getelementptr inbounds i16, ptr %p.addr.035, i64 10
+  %10 = load i16, ptr %arrayidx.10, align 2
   %conv.10 = zext i16 %10 to i32
   %add.10 = add i32 %add.9, %conv.10
   %mul.10 = mul nuw nsw i32 %conv.10, %conv.10
   %add11.10 = add i32 %mul.10, %add11.9
-  %arrayidx.11 = getelementptr inbounds i16, i16* %p.addr.035, i64 11
-  %11 = load i16, i16* %arrayidx.11, align 2
+  %arrayidx.11 = getelementptr inbounds i16, ptr %p.addr.035, i64 11
+  %11 = load i16, ptr %arrayidx.11, align 2
   %conv.11 = zext i16 %11 to i32
   %add.11 = add i32 %add.10, %conv.11
   %mul.11 = mul nuw nsw i32 %conv.11, %conv.11
   %add11.11 = add i32 %mul.11, %add11.10
-  %arrayidx.12 = getelementptr inbounds i16, i16* %p.addr.035, i64 12
-  %12 = load i16, i16* %arrayidx.12, align 2
+  %arrayidx.12 = getelementptr inbounds i16, ptr %p.addr.035, i64 12
+  %12 = load i16, ptr %arrayidx.12, align 2
   %conv.12 = zext i16 %12 to i32
   %add.12 = add i32 %add.11, %conv.12
   %mul.12 = mul nuw nsw i32 %conv.12, %conv.12
   %add11.12 = add i32 %mul.12, %add11.11
-  %arrayidx.13 = getelementptr inbounds i16, i16* %p.addr.035, i64 13
-  %13 = load i16, i16* %arrayidx.13, align 2
+  %arrayidx.13 = getelementptr inbounds i16, ptr %p.addr.035, i64 13
+  %13 = load i16, ptr %arrayidx.13, align 2
   %conv.13 = zext i16 %13 to i32
   %add.13 = add i32 %add.12, %conv.13
   %mul.13 = mul nuw nsw i32 %conv.13, %conv.13
   %add11.13 = add i32 %mul.13, %add11.12
-  %arrayidx.14 = getelementptr inbounds i16, i16* %p.addr.035, i64 14
-  %14 = load i16, i16* %arrayidx.14, align 2
+  %arrayidx.14 = getelementptr inbounds i16, ptr %p.addr.035, i64 14
+  %14 = load i16, ptr %arrayidx.14, align 2
   %conv.14 = zext i16 %14 to i32
   %add.14 = add i32 %add.13, %conv.14
   %mul.14 = mul nuw nsw i32 %conv.14, %conv.14
   %add11.14 = add i32 %mul.14, %add11.13
-  %arrayidx.15 = getelementptr inbounds i16, i16* %p.addr.035, i64 15
-  %15 = load i16, i16* %arrayidx.15, align 2
+  %arrayidx.15 = getelementptr inbounds i16, ptr %p.addr.035, i64 15
+  %15 = load i16, ptr %arrayidx.15, align 2
   %conv.15 = zext i16 %15 to i32
   %add.15 = add i32 %add.14, %conv.15
   %mul.15 = mul nuw nsw i32 %conv.15, %conv.15
   %add11.15 = add i32 %mul.15, %add11.14
-  %add.ptr = getelementptr inbounds i16, i16* %p.addr.035, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i16, ptr %p.addr.035, i64 %idx.ext
   %inc13 = add nuw nsw i32 %y.038, 1
   %exitcond.not = icmp eq i32 %inc13, 16
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
index ff28ca43c2f62..c53de872ebdcf 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
@@ -3,36 +3,33 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
-define void @foo(float* noalias %a, float* noalias %b, float* noalias %c) {
+define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !nontemporal !0
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4, !nontemporal !0
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4, !nontemporal !0
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr [[A:%.*]], align 4, !nontemporal !0
 ; CHECK-NEXT:    ret void
 ;
 entry:
 ; Check that we don't lose !nontemporal hint when vectorizing loads.
-  %b1 = load float, float* %b, align 4, !nontemporal !0
-  %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
-  %b2 = load float, float* %arrayidx.1, align 4, !nontemporal !0
-  %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
-  %b3 = load float, float* %arrayidx.2, align 4, !nontemporal !0
-  %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
-  %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+  %b1 = load float, ptr %b, align 4, !nontemporal !0
+  %arrayidx.1 = getelementptr inbounds float, ptr %b, i64 1
+  %b2 = load float, ptr %arrayidx.1, align 4, !nontemporal !0
+  %arrayidx.2 = getelementptr inbounds float, ptr %b, i64 2
+  %b3 = load float, ptr %arrayidx.2, align 4, !nontemporal !0
+  %arrayidx.3 = getelementptr inbounds float, ptr %b, i64 3
+  %b4 = load float, ptr %arrayidx.3, align 4, !nontemporal !0
 
 ; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
-  %c1 = load float, float* %c, align 4
-  %arrayidx2.1 = getelementptr inbounds float, float* %c, i64 1
-  %c2 = load float, float* %arrayidx2.1, align 4
-  %arrayidx2.2 = getelementptr inbounds float, float* %c, i64 2
-  %c3 = load float, float* %arrayidx2.2, align 4
-  %arrayidx2.3 = getelementptr inbounds float, float* %c, i64 3
-  %c4 = load float, float* %arrayidx2.3, align 4
+  %c1 = load float, ptr %c, align 4
+  %arrayidx2.1 = getelementptr inbounds float, ptr %c, i64 1
+  %c2 = load float, ptr %arrayidx2.1, align 4
+  %arrayidx2.2 = getelementptr inbounds float, ptr %c, i64 2
+  %c3 = load float, ptr %arrayidx2.2, align 4
+  %arrayidx2.3 = getelementptr inbounds float, ptr %c, i64 3
+  %c4 = load float, ptr %arrayidx2.3, align 4
 
   %a1 = fadd float %b1, %c1
   %a2 = fadd float %b2, %c2
@@ -40,46 +37,44 @@ entry:
   %a4 = fadd float %b4, %c4
 
 ; Check that we don't lose !nontemporal hint when vectorizing stores.
-  store float %a1, float* %a, align 4, !nontemporal !0
-  %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
-  store float %a2, float* %arrayidx3.1, align 4, !nontemporal !0
-  %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
-  store float %a3, float* %arrayidx3.2, align 4, !nontemporal !0
-  %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
-  store float %a4, float* %arrayidx3.3, align 4, !nontemporal !0
+  store float %a1, ptr %a, align 4, !nontemporal !0
+  %arrayidx3.1 = getelementptr inbounds float, ptr %a, i64 1
+  store float %a2, ptr %arrayidx3.1, align 4, !nontemporal !0
+  %arrayidx3.2 = getelementptr inbounds float, ptr %a, i64 2
+  store float %a3, ptr %arrayidx3.2, align 4, !nontemporal !0
+  %arrayidx3.3 = getelementptr inbounds float, ptr %a, i64 3
+  store float %a4, ptr %arrayidx3.3, align 4, !nontemporal !0
 
   ret void
 }
 
-define void @foo2(float* noalias %a, float* noalias %b) {
+define void @foo2(ptr noalias %a, ptr noalias %b) {
 ; CHECK-LABEL: @foo2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
 ; Check that we don't mark vector load with !nontemporal attribute if some of
 ; the original scalar loads don't have it.
-  %b1 = load float, float* %b, align 4, !nontemporal !0
-  %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
-  %b2 = load float, float* %arrayidx.1, align 4
-  %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
-  %b3 = load float, float* %arrayidx.2, align 4
-  %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
-  %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+  %b1 = load float, ptr %b, align 4, !nontemporal !0
+  %arrayidx.1 = getelementptr inbounds float, ptr %b, i64 1
+  %b2 = load float, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %b, i64 2
+  %b3 = load float, ptr %arrayidx.2, align 4
+  %arrayidx.3 = getelementptr inbounds float, ptr %b, i64 3
+  %b4 = load float, ptr %arrayidx.3, align 4, !nontemporal !0
 
 ; Check that we don't mark vector store with !nontemporal attribute if some of
 ; the original scalar stores don't have it.
-  store float %b1, float* %a, align 4, !nontemporal !0
-  %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
-  store float %b2, float* %arrayidx3.1, align 4
-  %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
-  store float %b3, float* %arrayidx3.2, align 4
-  %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
-  store float %b4, float* %arrayidx3.3, align 4, !nontemporal !0
+  store float %b1, ptr %a, align 4, !nontemporal !0
+  %arrayidx3.1 = getelementptr inbounds float, ptr %a, i64 1
+  store float %b2, ptr %arrayidx3.1, align 4
+  %arrayidx3.2 = getelementptr inbounds float, ptr %a, i64 2
+  store float %b3, ptr %arrayidx3.2, align 4
+  %arrayidx3.3 = getelementptr inbounds float, ptr %a, i64 3
+  store float %b4, ptr %arrayidx3.3, align 4, !nontemporal !0
 
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
index 90577f701afec..3decb7e42e24d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/remarks.ll
@@ -1,17 +1,15 @@
 ; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu -mcpu=generic -pass-remarks=slp-vectorizer -o /dev/null < %s 2>&1 | FileCheck %s
 
-define void @f(double* %r, double* %w) {
-  %r0 = getelementptr inbounds double, double* %r, i64 0
-  %r1 = getelementptr inbounds double, double* %r, i64 1
-  %f0 = load double, double* %r0
-  %f1 = load double, double* %r1
+define void @f(ptr %r, ptr %w) {
+  %r1 = getelementptr inbounds double, ptr %r, i64 1
+  %f0 = load double, ptr %r
+  %f1 = load double, ptr %r1
   %add0 = fadd double %f0, %f0
   %add1 = fadd double %f1, %f1
-  %w0 = getelementptr inbounds double, double* %w, i64 0
-  %w1 = getelementptr inbounds double, double* %w, i64 1
+  %w1 = getelementptr inbounds double, ptr %w, i64 1
 ; CHECK: remark: /tmp/s.c:5:10: Stores SLP vectorized with cost -4 and with tree size 3
-  store double %add0, double* %w0, !dbg !9
-  store double %add1, double* %w1
+  store double %add0, ptr %w, !dbg !9
+  store double %add1, ptr %w1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
index 9ce28b68d36aa..b20dd0c41e174 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reorder-fmuladd-crash.ll
@@ -17,9 +17,8 @@ define i32 @foo() {
 ; CHECK-NEXT:    i32 1, label [[SW_BB195:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb:
-; CHECK-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], ptr undef, i32 0, i64 1, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[ARRAYIDX43]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
 ; CHECK-NEXT:    br label [[SW_EPILOG:%.*]]
@@ -56,14 +55,14 @@ for.end39:                                        ; preds = %for.cond15.1
   ]
 
 sw.bb:                                            ; preds = %for.end39
-  %arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
-  %0 = load double, double* %arrayidx43, align 8
-  %arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
-  %1 = load double, double* %arrayidx45, align 8
-  %arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
-  %2 = load double, double* %arrayidx51, align 8
-  %arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
-  %3 = load double, double* %arrayidx58, align 8
+  %arrayidx43 = getelementptr inbounds [4 x [2 x double]], ptr undef, i32 0, i64 1, i64 0
+  %0 = load double, ptr %arrayidx43, align 8
+  %arrayidx45 = getelementptr inbounds [4 x [2 x double]], ptr undef, i32 0, i64 2, i64 0
+  %1 = load double, ptr %arrayidx45, align 8
+  %arrayidx51 = getelementptr inbounds [4 x [2 x double]], ptr undef, i32 0, i64 2, i64 1
+  %2 = load double, ptr %arrayidx51, align 8
+  %arrayidx58 = getelementptr inbounds [4 x [2 x double]], ptr undef, i32 0, i64 1, i64 1
+  %3 = load double, ptr %arrayidx58, align 8
   %mul = fmul double undef, %conv2
   %mul109 = fmul double undef, %conv
   %mul143 = fmul double %0, %mul

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
index 442a42e63fefa..2bde20231c076 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@@ -8,16 +8,16 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @test() {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
 ; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
-; CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[ADD]], ptr undef, i32 1, <vscale x 16 x i1> undef)
 ; CHECK-NEXT:    ret void
 ;
-  %load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-  %load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
   %add = add <vscale x 16 x i8> %load1, %load0
-  tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+  tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> %add, ptr undef, i32 1, <vscale x 16 x i1> undef)
   ret void
 }
 
@@ -142,18 +142,18 @@ define void @sext_scalable_extractelement() {
 ; CHECK-LABEL: @sext_scalable_extractelement(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[TMP4]]
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <vscale x 2 x i32> undef, i32 undef
   %1 = sext i32 %x0 to i64
-  %2 = getelementptr inbounds i64, i64* undef, i64 %1
+  %2 = getelementptr inbounds i64, ptr undef, i64 %1
   %3 = extractelement <vscale x 2 x i32> undef, i32 undef
   %4 = sext i32 %3 to i64
-  %5 = getelementptr inbounds i64, i64* undef, i64 %4
+  %5 = getelementptr inbounds i64, ptr undef, i64 %4
   ret void
 }
 
@@ -161,18 +161,18 @@ define void @zext_scalable_extractelement() {
 ; CHECK-LABEL: @zext_scalable_extractelement(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[X0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[TMP4]]
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <vscale x 2 x i32> undef, i32 undef
   %1 = zext i32 %x0 to i64
-  %2 = getelementptr inbounds i64, i64* undef, i64 %1
+  %2 = getelementptr inbounds i64, ptr undef, i64 %1
   %3 = extractelement <vscale x 2 x i32> undef, i32 undef
   %4 = zext i32 %3 to i64
-  %5 = getelementptr inbounds i64, i64* undef, i64 %4
+  %5 = getelementptr inbounds i64, ptr undef, i64 %4
   ret void
 }
 
@@ -180,20 +180,20 @@ define void @trunc_scalable_extractelement() {
 ; CHECK-LABEL: @trunc_scalable_extractelement(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <vscale x 2 x i64> undef, i32 undef
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr undef, i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 2 x i64> undef, i32 undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr undef, i32 [[TMP4]]
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <vscale x 2 x i64> undef, i32 undef
   %1 = trunc i64 %x0 to i32
-  %2 = getelementptr inbounds i32, i32* undef, i32 %1
+  %2 = getelementptr inbounds i32, ptr undef, i32 %1
   %3 = extractelement <vscale x 2 x i64> undef, i32 undef
   %4 = trunc i64 %3 to i32
-  %5 = getelementptr inbounds i32, i32* undef, i32 %4
+  %5 = getelementptr inbounds i32, ptr undef, i32 %4
   ret void
 }
 
-declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8>, ptr, i32 immarg, <vscale x 16 x i1>)

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
index 705b8177b0395..0c3c695a308cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -3,48 +3,45 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
-define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
-  store i32 %div, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %div, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 2
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %div6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %div6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 2
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %div11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %div11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 2
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %div16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %div16, ptr %arrayidx17, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll
index 5066dbcee9e3e..b931cb3060b9b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-and-reduction.ll
@@ -3,67 +3,61 @@
 
 %struct.buf = type { [8 x i8] }
 
-define i8 @reduce_and(%struct.buf* %a, %struct.buf* %b) {
+define i8 @reduce_and(ptr %a, ptr %b) {
 ; CHECK-LABEL: @reduce_and(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], %struct.buf* [[A:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ARRAYIDX3]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <8 x i8> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP4]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = and i8 [[TMP5]], 1
 ; CHECK-NEXT:    ret i8 [[OP_RDX]]
 ;
 entry:
-  %arrayidx = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 0
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 0
-  %1 = load i8, i8* %arrayidx3, align 1
+  %0 = load i8, ptr %a, align 1
+  %1 = load i8, ptr %b, align 1
   %xor12 = xor i8 %1, %0
   %and13 = and i8 %xor12, 1
-  %arrayidx.1 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 1
-  %2 = load i8, i8* %arrayidx.1, align 1
-  %arrayidx3.1 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 1
-  %3 = load i8, i8* %arrayidx3.1, align 1
+  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+  %2 = load i8, ptr %arrayidx.1, align 1
+  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+  %3 = load i8, ptr %arrayidx3.1, align 1
   %xor12.1 = xor i8 %3, %2
   %and13.1 = and i8 %xor12.1, %and13
-  %arrayidx.2 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 2
-  %4 = load i8, i8* %arrayidx.2, align 1
-  %arrayidx3.2 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 2
-  %5 = load i8, i8* %arrayidx3.2, align 1
+  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+  %4 = load i8, ptr %arrayidx.2, align 1
+  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+  %5 = load i8, ptr %arrayidx3.2, align 1
   %xor12.2 = xor i8 %5, %4
   %and13.2 = and i8 %xor12.2, %and13.1
-  %arrayidx.3 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 3
-  %6 = load i8, i8* %arrayidx.3, align 1
-  %arrayidx3.3 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 3
-  %7 = load i8, i8* %arrayidx3.3, align 1
+  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+  %6 = load i8, ptr %arrayidx.3, align 1
+  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+  %7 = load i8, ptr %arrayidx3.3, align 1
   %xor12.3 = xor i8 %7, %6
   %and13.3 = and i8 %xor12.3, %and13.2
-  %arrayidx.4 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 4
-  %8 = load i8, i8* %arrayidx.4, align 1
-  %arrayidx3.4 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 4
-  %9 = load i8, i8* %arrayidx3.4, align 1
+  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+  %8 = load i8, ptr %arrayidx.4, align 1
+  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+  %9 = load i8, ptr %arrayidx3.4, align 1
   %xor12.4 = xor i8 %9, %8
   %and13.4 = and i8 %xor12.4, %and13.3
-  %arrayidx.5 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 5
-  %10 = load i8, i8* %arrayidx.5, align 1
-  %arrayidx3.5 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 5
-  %11 = load i8, i8* %arrayidx3.5, align 1
+  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+  %10 = load i8, ptr %arrayidx.5, align 1
+  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+  %11 = load i8, ptr %arrayidx3.5, align 1
   %xor12.5 = xor i8 %11, %10
   %and13.5 = and i8 %xor12.5, %and13.4
-  %arrayidx.6 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 6
-  %12 = load i8, i8* %arrayidx.6, align 1
-  %arrayidx3.6 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 6
-  %13 = load i8, i8* %arrayidx3.6, align 1
+  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+  %12 = load i8, ptr %arrayidx.6, align 1
+  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+  %13 = load i8, ptr %arrayidx3.6, align 1
   %xor12.6 = xor i8 %13, %12
   %and13.6 = and i8 %xor12.6, %and13.5
-  %arrayidx.7 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 7
-  %14 = load i8, i8* %arrayidx.7, align 1
-  %arrayidx3.7 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 7
-  %15 = load i8, i8* %arrayidx3.7, align 1
+  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+  %14 = load i8, ptr %arrayidx.7, align 1
+  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+  %15 = load i8, ptr %arrayidx3.7, align 1
   %xor12.7 = xor i8 %15, %14
   %and13.7 = and i8 %xor12.7, %and13.6
   ret i8 %and13.7

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
index b2643095929de..5e82982b2b857 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
@@ -3,66 +3,60 @@
 
 %struct.buf = type { [8 x i8] }
 
-define i8 @reduce_or(%struct.buf* %a, %struct.buf* %b) {
+define i8 @reduce_or(ptr %a, ptr %b) {
 ; CHECK-LABEL: @reduce_or(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], %struct.buf* [[A:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ARRAYIDX3]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <8 x i8> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP4]])
 ; CHECK-NEXT:    ret i8 [[TMP5]]
 ;
 
 entry:
-  %arrayidx = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 0
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 0
-  %1 = load i8, i8* %arrayidx3, align 1
+  %0 = load i8, ptr %a, align 1
+  %1 = load i8, ptr %b, align 1
   %xor12 = xor i8 %1, %0
-  %arrayidx.1 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 1
-  %2 = load i8, i8* %arrayidx.1, align 1
-  %arrayidx3.1 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 1
-  %3 = load i8, i8* %arrayidx3.1, align 1
+  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+  %2 = load i8, ptr %arrayidx.1, align 1
+  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+  %3 = load i8, ptr %arrayidx3.1, align 1
   %xor12.1 = xor i8 %3, %2
   %or13.1 = or i8 %xor12.1, %xor12
-  %arrayidx.2 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 2
-  %4 = load i8, i8* %arrayidx.2, align 1
-  %arrayidx3.2 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 2
-  %5 = load i8, i8* %arrayidx3.2, align 1
+  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+  %4 = load i8, ptr %arrayidx.2, align 1
+  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+  %5 = load i8, ptr %arrayidx3.2, align 1
   %xor12.2 = xor i8 %5, %4
   %or13.2 = or i8 %xor12.2, %or13.1
-  %arrayidx.3 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 3
-  %6 = load i8, i8* %arrayidx.3, align 1
-  %arrayidx3.3 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 3
-  %7 = load i8, i8* %arrayidx3.3, align 1
+  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+  %6 = load i8, ptr %arrayidx.3, align 1
+  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+  %7 = load i8, ptr %arrayidx3.3, align 1
   %xor12.3 = xor i8 %7, %6
   %or13.3 = or i8 %xor12.3, %or13.2
-  %arrayidx.4 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 4
-  %8 = load i8, i8* %arrayidx.4, align 1
-  %arrayidx3.4 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 4
-  %9 = load i8, i8* %arrayidx3.4, align 1
+  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+  %8 = load i8, ptr %arrayidx.4, align 1
+  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+  %9 = load i8, ptr %arrayidx3.4, align 1
   %xor12.4 = xor i8 %9, %8
   %or13.4 = or i8 %xor12.4, %or13.3
-  %arrayidx.5 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 5
-  %10 = load i8, i8* %arrayidx.5, align 1
-  %arrayidx3.5 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 5
-  %11 = load i8, i8* %arrayidx3.5, align 1
+  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+  %10 = load i8, ptr %arrayidx.5, align 1
+  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+  %11 = load i8, ptr %arrayidx3.5, align 1
   %xor12.5 = xor i8 %11, %10
   %or13.5 = or i8 %xor12.5, %or13.4
-  %arrayidx.6 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 6
-  %12 = load i8, i8* %arrayidx.6, align 1
-  %arrayidx3.6 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 6
-  %13 = load i8, i8* %arrayidx3.6, align 1
+  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+  %12 = load i8, ptr %arrayidx.6, align 1
+  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+  %13 = load i8, ptr %arrayidx3.6, align 1
   %xor12.6 = xor i8 %13, %12
   %or13.6 = or i8 %xor12.6, %or13.5
-  %arrayidx.7 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 7
-  %14 = load i8, i8* %arrayidx.7, align 1
-  %arrayidx3.7 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 7
-  %15 = load i8, i8* %arrayidx3.7, align 1
+  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+  %14 = load i8, ptr %arrayidx.7, align 1
+  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+  %15 = load i8, ptr %arrayidx3.7, align 1
   %xor12.7 = xor i8 %15, %14
   %or13.7 = or i8 %xor12.7, %or13.6
   ret i8 %or13.7

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll
index ded11f358de5c..68f1fcd379e35 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-xor-reduction.ll
@@ -3,66 +3,60 @@
 
 %struct.buf = type { [8 x i8] }
 
-define i8 @reduce_xor(%struct.buf* %a, %struct.buf* %b) {
+define i8 @reduce_xor(ptr %a, ptr %b) {
 ; CHECK-LABEL: @reduce_xor(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], %struct.buf* [[A:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], %struct.buf* [[B:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ARRAYIDX3]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP4]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i8 [[TMP5]], 1
 ; CHECK-NEXT:    ret i8 [[OP_RDX]]
 ;
 entry:
-  %arrayidx = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 0
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 0
-  %1 = load i8, i8* %arrayidx3, align 1
+  %0 = load i8, ptr %a, align 1
+  %1 = load i8, ptr %b, align 1
   %and12 = and i8 %1, %0
-  %arrayidx.1 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 1
-  %2 = load i8, i8* %arrayidx.1, align 1
-  %arrayidx3.1 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 1
-  %3 = load i8, i8* %arrayidx3.1, align 1
+  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+  %2 = load i8, ptr %arrayidx.1, align 1
+  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+  %3 = load i8, ptr %arrayidx3.1, align 1
   %and12.1 = and i8 %3, %2
   %4 = xor i8 %and12, %and12.1
-  %arrayidx.2 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 2
-  %5 = load i8, i8* %arrayidx.2, align 1
-  %arrayidx3.2 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 2
-  %6 = load i8, i8* %arrayidx3.2, align 1
+  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+  %5 = load i8, ptr %arrayidx.2, align 1
+  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+  %6 = load i8, ptr %arrayidx3.2, align 1
   %and12.2 = and i8 %6, %5
   %7 = xor i8 %4, %and12.2
-  %arrayidx.3 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 3
-  %8 = load i8, i8* %arrayidx.3, align 1
-  %arrayidx3.3 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 3
-  %9 = load i8, i8* %arrayidx3.3, align 1
+  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+  %8 = load i8, ptr %arrayidx.3, align 1
+  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+  %9 = load i8, ptr %arrayidx3.3, align 1
   %and12.3 = and i8 %9, %8
   %10 = xor i8 %7, %and12.3
-  %arrayidx.4 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 4
-  %11 = load i8, i8* %arrayidx.4, align 1
-  %arrayidx3.4 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 4
-  %12 = load i8, i8* %arrayidx3.4, align 1
+  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+  %11 = load i8, ptr %arrayidx.4, align 1
+  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+  %12 = load i8, ptr %arrayidx3.4, align 1
   %and12.4 = and i8 %12, %11
   %13 = xor i8 %10, %and12.4
-  %arrayidx.5 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 5
-  %14 = load i8, i8* %arrayidx.5, align 1
-  %arrayidx3.5 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 5
-  %15 = load i8, i8* %arrayidx3.5, align 1
+  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+  %14 = load i8, ptr %arrayidx.5, align 1
+  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+  %15 = load i8, ptr %arrayidx3.5, align 1
   %and12.5 = and i8 %15, %14
   %16 = xor i8 %13, %and12.5
-  %arrayidx.6 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 6
-  %17 = load i8, i8* %arrayidx.6, align 1
-  %arrayidx3.6 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 6
-  %18 = load i8, i8* %arrayidx3.6, align 1
+  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+  %17 = load i8, ptr %arrayidx.6, align 1
+  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+  %18 = load i8, ptr %arrayidx3.6, align 1
   %and12.6 = and i8 %18, %17
   %19 = xor i8 %16, %and12.6
-  %arrayidx.7 = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 7
-  %20 = load i8, i8* %arrayidx.7, align 1
-  %arrayidx3.7 = getelementptr inbounds %struct.buf, %struct.buf* %b, i64 0, i32 0, i64 7
-  %21 = load i8, i8* %arrayidx3.7, align 1
+  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+  %20 = load i8, ptr %arrayidx.7, align 1
+  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+  %21 = load i8, ptr %arrayidx3.7, align 1
   %and12.7 = and i8 %21, %20
   %22 = xor i8 %19, %and12.7
   %xor13.7 = xor i8 %22, 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll
index f1c1c5baca9b1..aeff0be90044f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll
@@ -5,24 +5,21 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios13.0.0"
 
 declare i1 @cond()
-declare i32* @get_ptr()
+declare ptr @get_ptr()
 
-define void @test(i64* %ptr, i64* noalias %res) {
+define void @test(ptr %ptr, ptr noalias %res) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[CALL_I_I:%.*]] = call i32* @get_ptr()
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[CALL_I_I]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[GEP_1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 2
+; CHECK-NEXT:    [[CALL_I_I:%.*]] = call ptr @get_ptr()
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i32, ptr [[CALL_I_I]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[CALL_I_I]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[GEP_1]], align 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[RES:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[RES:%.*]], align 8
 ; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
 ; CHECK-NEXT:    br i1 [[C]], label [[FOR_BODY]], label [[EXIT:%.*]]
 ; CHECK:       exit:
@@ -32,25 +29,25 @@ entry:
   br label %for.body
 
 for.body:                                  ; preds = %for.body, %entry
-  %call.i.i = call i32* @get_ptr()
-  %l.0.0 = load i32, i32* %call.i.i, align 2
-  %gep.1 = getelementptr i32, i32* %call.i.i, i32 2
-  %l.1.0 = load i32, i32* %gep.1, align 2
+  %call.i.i = call ptr @get_ptr()
+  %l.0.0 = load i32, ptr %call.i.i, align 2
+  %gep.1 = getelementptr i32, ptr %call.i.i, i32 2
+  %l.1.0 = load i32, ptr %gep.1, align 2
   %ext.0.0 = zext i32 %l.0.0 to i64
   %ext.1.0 = zext i32 %l.1.0 to i64
   %sub.1 = sub nsw i64 %ext.0.0, %ext.1.0
 
-  %gep.2 = getelementptr i32, i32* %call.i.i, i32 1
-  %l.0.1 = load i32, i32* %gep.2, align 2
-  %gep.3 = getelementptr i32, i32* %call.i.i, i32 3
-  %l.1.1 = load i32, i32* %gep.3, align 2
+  %gep.2 = getelementptr i32, ptr %call.i.i, i32 1
+  %l.0.1 = load i32, ptr %gep.2, align 2
+  %gep.3 = getelementptr i32, ptr %call.i.i, i32 3
+  %l.1.1 = load i32, ptr %gep.3, align 2
   %ext.0.1 = zext i32 %l.0.1 to i64
   %ext.1.1 = zext i32 %l.1.1 to i64
   %sub.2 = sub nsw i64 %ext.0.1, %ext.1.1
 
-  store i64 %sub.1, i64* %res
-  %res.1 = getelementptr i64, i64* %res, i64 1
-  store i64 %sub.2, i64* %res.1
+  store i64 %sub.1, ptr %res
+  %res.1 = getelementptr i64, ptr %res, i64 1
+  store i64 %sub.2, ptr %res.1
 
   %c = call i1 @cond()
   br i1 %c, label %for.body, label %exit

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
index d9f33c2b5b6f8..47347719d373a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll
@@ -5,16 +5,13 @@ target triple = "aarch64--linux-gnu"
 
 ; This checks that we we prefer splats rather than load vectors + shuffles.
 ; A load + broadcast can be done efficiently with a single `ld1r` instruction.
-define void @splat_loads_double(double *%array1, double *%array2, double *%ptrA, double *%ptrB) {
+define void @splat_loads_double(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; CHECK-LABEL: @splat_loads_double(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
-; CHECK-NEXT:    [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
-; CHECK-NEXT:    [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1
+; CHECK-NEXT:    [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8
+; CHECK-NEXT:    [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
@@ -22,20 +19,17 @@ define void @splat_loads_double(double *%array1, double *%array2, double *%ptrA,
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP6]], ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep_1_0 = getelementptr inbounds double, double* %array1, i64 0
-  %gep_1_1 = getelementptr inbounds double, double* %array1, i64 1
-  %ld_1_0 = load double, double* %gep_1_0, align 8
-  %ld_1_1 = load double, double* %gep_1_1, align 8
+  %gep_1_1 = getelementptr inbounds double, ptr %array1, i64 1
+  %ld_1_0 = load double, ptr %array1, align 8
+  %ld_1_1 = load double, ptr %gep_1_1, align 8
 
-  %gep_2_0 = getelementptr inbounds double, double* %array2, i64 0
-  %gep_2_1 = getelementptr inbounds double, double* %array2, i64 1
-  %ld_2_0 = load double, double* %gep_2_0, align 8
-  %ld_2_1 = load double, double* %gep_2_1, align 8
+  %gep_2_1 = getelementptr inbounds double, ptr %array2, i64 1
+  %ld_2_0 = load double, ptr %array2, align 8
+  %ld_2_1 = load double, ptr %gep_2_1, align 8
 
   %mul0 = fmul double %ld_1_0, %ld_2_0
   %mul1 = fmul double %ld_1_1, %ld_2_0
@@ -46,22 +40,19 @@ entry:
   %add0 = fadd double %mul0, %mul2
   %add1 = fadd double %mul1, %mul3
 
-  store double %add0, double *%gep_1_0
-  store double %add1, double *%gep_1_1
+  store double %add0, ptr %array1
+  store double %add1, ptr %gep_1_1
   ret void
 }
 
 ; Same but with float instead of double
-define void @splat_loads_float(float *%array1, float *%array2, float *%ptrA, float *%ptrB) {
+define void @splat_loads_float(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; CHECK-LABEL: @splat_loads_float(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds float, float* [[ARRAY1:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds float, float* [[ARRAY2:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds float, float* [[ARRAY2]], i64 1
-; CHECK-NEXT:    [[LD_2_0:%.*]] = load float, float* [[GEP_2_0]], align 8
-; CHECK-NEXT:    [[LD_2_1:%.*]] = load float, float* [[GEP_2_1]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
+; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds float, ptr [[ARRAY2:%.*]], i64 1
+; CHECK-NEXT:    [[LD_2_0:%.*]] = load float, ptr [[ARRAY2]], align 8
+; CHECK-NEXT:    [[LD_2_1:%.*]] = load float, ptr [[GEP_2_1]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[ARRAY1:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[LD_2_0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]]
@@ -69,20 +60,17 @@ define void @splat_loads_float(float *%array1, float *%array2, float *%ptrA, flo
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x float> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP6]], <2 x float>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP6]], ptr [[ARRAY1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep_1_0 = getelementptr inbounds float, float* %array1, i64 0
-  %gep_1_1 = getelementptr inbounds float, float* %array1, i64 1
-  %ld_1_0 = load float, float* %gep_1_0, align 8
-  %ld_1_1 = load float, float* %gep_1_1, align 8
+  %gep_1_1 = getelementptr inbounds float, ptr %array1, i64 1
+  %ld_1_0 = load float, ptr %array1, align 8
+  %ld_1_1 = load float, ptr %gep_1_1, align 8
 
-  %gep_2_0 = getelementptr inbounds float, float* %array2, i64 0
-  %gep_2_1 = getelementptr inbounds float, float* %array2, i64 1
-  %ld_2_0 = load float, float* %gep_2_0, align 8
-  %ld_2_1 = load float, float* %gep_2_1, align 8
+  %gep_2_1 = getelementptr inbounds float, ptr %array2, i64 1
+  %ld_2_0 = load float, ptr %array2, align 8
+  %ld_2_1 = load float, ptr %gep_2_1, align 8
 
   %mul0 = fmul float %ld_1_0, %ld_2_0
   %mul1 = fmul float %ld_1_1, %ld_2_0
@@ -93,22 +81,19 @@ entry:
   %add0 = fadd float %mul0, %mul2
   %add1 = fadd float %mul1, %mul3
 
-  store float %add0, float *%gep_1_0
-  store float %add1, float *%gep_1_1
+  store float %add0, ptr %array1
+  store float %add1, ptr %gep_1_1
   ret void
 }
 
 ; Same but with i64
-define void @splat_loads_i64(i64 *%array1, i64 *%array2, i64 *%ptrA, i64 *%ptrB) {
+define void @splat_loads_i64(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; CHECK-LABEL: @splat_loads_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds i64, i64* [[ARRAY1:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds i64, i64* [[ARRAY2:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds i64, i64* [[ARRAY2]], i64 1
-; CHECK-NEXT:    [[LD_2_0:%.*]] = load i64, i64* [[GEP_2_0]], align 8
-; CHECK-NEXT:    [[LD_2_1:%.*]] = load i64, i64* [[GEP_2_1]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds i64, ptr [[ARRAY2:%.*]], i64 1
+; CHECK-NEXT:    [[LD_2_0:%.*]] = load i64, ptr [[ARRAY2]], align 8
+; CHECK-NEXT:    [[LD_2_1:%.*]] = load i64, ptr [[GEP_2_1]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAY1:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE]]
@@ -116,20 +101,17 @@ define void @splat_loads_i64(i64 *%array1, i64 *%array2, i64 *%ptrA, i64 *%ptrB)
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[ARRAY1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep_1_0 = getelementptr inbounds i64, i64* %array1, i64 0
-  %gep_1_1 = getelementptr inbounds i64, i64* %array1, i64 1
-  %ld_1_0 = load i64, i64* %gep_1_0, align 8
-  %ld_1_1 = load i64, i64* %gep_1_1, align 8
+  %gep_1_1 = getelementptr inbounds i64, ptr %array1, i64 1
+  %ld_1_0 = load i64, ptr %array1, align 8
+  %ld_1_1 = load i64, ptr %gep_1_1, align 8
 
-  %gep_2_0 = getelementptr inbounds i64, i64* %array2, i64 0
-  %gep_2_1 = getelementptr inbounds i64, i64* %array2, i64 1
-  %ld_2_0 = load i64, i64* %gep_2_0, align 8
-  %ld_2_1 = load i64, i64* %gep_2_1, align 8
+  %gep_2_1 = getelementptr inbounds i64, ptr %array2, i64 1
+  %ld_2_0 = load i64, ptr %array2, align 8
+  %ld_2_1 = load i64, ptr %gep_2_1, align 8
 
   %or0 = or i64 %ld_1_0, %ld_2_0
   %or1 = or i64 %ld_1_1, %ld_2_0
@@ -140,22 +122,19 @@ entry:
   %add0 = add i64 %or0, %or2
   %add1 = add i64 %or1, %or3
 
-  store i64 %add0, i64 *%gep_1_0
-  store i64 %add1, i64 *%gep_1_1
+  store i64 %add0, ptr %array1
+  store i64 %add1, ptr %gep_1_1
   ret void
 }
 
 ; Same but with i32
-define void @splat_loads_i32(i32 *%array1, i32 *%array2, i32 *%ptrA, i32 *%ptrB) {
+define void @splat_loads_i32(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; CHECK-LABEL: @splat_loads_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds i32, i32* [[ARRAY1:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds i32, i32* [[ARRAY2:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds i32, i32* [[ARRAY2]], i64 1
-; CHECK-NEXT:    [[LD_2_0:%.*]] = load i32, i32* [[GEP_2_0]], align 8
-; CHECK-NEXT:    [[LD_2_1:%.*]] = load i32, i32* [[GEP_2_1]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
+; CHECK-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds i32, ptr [[ARRAY2:%.*]], i64 1
+; CHECK-NEXT:    [[LD_2_0:%.*]] = load i32, ptr [[ARRAY2]], align 8
+; CHECK-NEXT:    [[LD_2_1:%.*]] = load i32, ptr [[GEP_2_1]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[ARRAY1:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE]]
@@ -163,20 +142,17 @@ define void @splat_loads_i32(i32 *%array1, i32 *%array2, i32 *%ptrA, i32 *%ptrB)
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[ARRAY1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep_1_0 = getelementptr inbounds i32, i32* %array1, i64 0
-  %gep_1_1 = getelementptr inbounds i32, i32* %array1, i64 1
-  %ld_1_0 = load i32, i32* %gep_1_0, align 8
-  %ld_1_1 = load i32, i32* %gep_1_1, align 8
+  %gep_1_1 = getelementptr inbounds i32, ptr %array1, i64 1
+  %ld_1_0 = load i32, ptr %array1, align 8
+  %ld_1_1 = load i32, ptr %gep_1_1, align 8
 
-  %gep_2_0 = getelementptr inbounds i32, i32* %array2, i64 0
-  %gep_2_1 = getelementptr inbounds i32, i32* %array2, i64 1
-  %ld_2_0 = load i32, i32* %gep_2_0, align 8
-  %ld_2_1 = load i32, i32* %gep_2_1, align 8
+  %gep_2_1 = getelementptr inbounds i32, ptr %array2, i64 1
+  %ld_2_0 = load i32, ptr %array2, align 8
+  %ld_2_1 = load i32, ptr %gep_2_1, align 8
 
   %or0 = or i32 %ld_1_0, %ld_2_0
   %or1 = or i32 %ld_1_1, %ld_2_0
@@ -187,7 +163,7 @@ entry:
   %add0 = add i32 %or0, %or2
   %add1 = add i32 %or1, %or3
 
-  store i32 %add0, i32 *%gep_1_0
-  store i32 %add1, i32 *%gep_1_1
+  store i32 %add0, ptr %array1
+  store i32 %add1, ptr %gep_1_1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index c39ba672bd6dc..8b28762cce87a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -28,39 +28,33 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
   ret <2 x i64> %tmp3.1
 }
 
-define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
+define void @store_chain_v2i64(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @store_chain_v2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP9]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a.0 = getelementptr i64, i64* %a, i64 0
-  %a.1 = getelementptr i64, i64* %a, i64 1
-  %b.0 = getelementptr i64, i64* %b, i64 0
-  %b.1 = getelementptr i64, i64* %b, i64 1
-  %c.0 = getelementptr i64, i64* %c, i64 0
-  %c.1 = getelementptr i64, i64* %c, i64 1
-  %v0.0 = load i64, i64* %a.0, align 8
-  %v0.1 = load i64, i64* %a.1, align 8
-  %v1.0 = load i64, i64* %b.0, align 8
-  %v1.1 = load i64, i64* %b.1, align 8
+  %a.1 = getelementptr i64, ptr %a, i64 1
+  %b.1 = getelementptr i64, ptr %b, i64 1
+  %c.1 = getelementptr i64, ptr %c, i64 1
+  %v0.0 = load i64, ptr %a, align 8
+  %v0.1 = load i64, ptr %a.1, align 8
+  %v1.0 = load i64, ptr %b, align 8
+  %v1.1 = load i64, ptr %b.1, align 8
   %tmp0.0 = add i64 %v0.0, %v1.0
   %tmp0.1 = add i64 %v0.1, %v1.1
   %tmp1.0 = sub i64 %v0.0, %v1.0
   %tmp1.1 = sub i64 %v0.1, %v1.1
   %tmp2.0 = add i64 %tmp0.0, %tmp0.1
   %tmp2.1 = add i64 %tmp1.0, %tmp1.1
-  store i64 %tmp2.0, i64* %c.0, align 8
-  store i64 %tmp2.1, i64* %c.1, align 8
+  store i64 %tmp2.0, ptr %c, align 8
+  store i64 %tmp2.1, ptr %c.1, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index e3cb9dd30d96c..125b209de1e3e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -28,39 +28,33 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
   ret <2 x i64> %tmp3.1
 }
 
-define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
+define void @store_chain_v2i64(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @store_chain_v2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP9]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a.0 = getelementptr i64, i64* %a, i64 0
-  %a.1 = getelementptr i64, i64* %a, i64 1
-  %b.0 = getelementptr i64, i64* %b, i64 0
-  %b.1 = getelementptr i64, i64* %b, i64 1
-  %c.0 = getelementptr i64, i64* %c, i64 0
-  %c.1 = getelementptr i64, i64* %c, i64 1
-  %v0.0 = load i64, i64* %a.0, align 8
-  %v0.1 = load i64, i64* %a.1, align 8
-  %v1.0 = load i64, i64* %b.0, align 8
-  %v1.1 = load i64, i64* %b.1, align 8
+  %a.1 = getelementptr i64, ptr %a, i64 1
+  %b.1 = getelementptr i64, ptr %b, i64 1
+  %c.1 = getelementptr i64, ptr %c, i64 1
+  %v0.0 = load i64, ptr %a, align 8
+  %v0.1 = load i64, ptr %a.1, align 8
+  %v1.0 = load i64, ptr %b, align 8
+  %v1.1 = load i64, ptr %b.1, align 8
   %tmp0.0 = add i64 %v0.0, %v1.0
   %tmp0.1 = add i64 %v0.1, %v1.1
   %tmp1.0 = sub i64 %v0.0, %v1.0
   %tmp1.1 = sub i64 %v0.1, %v1.1
   %tmp2.0 = add i64 %tmp0.0, %tmp0.1
   %tmp2.1 = add i64 %tmp1.0, %tmp1.1
-  store i64 %tmp2.0, i64* %c.0, align 8
-  store i64 %tmp2.1, i64* %c.1, align 8
+  store i64 %tmp2.0, ptr %c, align 8
+  store i64 %tmp2.1, ptr %c.1, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
index 492389d3828fd..56f252bf64040 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
@@ -42,7 +42,7 @@ define dso_local void @l() local_unnamed_addr {
 ; CHECK-NEXT:    [[I35:%.*]] = phi i32 [ [[I33]], [[BB25]] ]
 ; CHECK-NEXT:    br label [[BB36:%.*]]
 ; CHECK:       bb36:
-; CHECK-NEXT:    store i32 [[I35]], i32* @d, align 4
+; CHECK-NEXT:    store i32 [[I35]], ptr @d, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -95,6 +95,6 @@ bb34:                                             ; preds = %bb25
   br label %bb36
 
 bb36:                                             ; preds = %bb34
-  store i32 %i35, i32* @d, align 4
+  store i32 %i35, ptr @d, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
index e25022c05a057..31ddc782cee14 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
@@ -15,16 +15,13 @@
 ; are not selected for vectorization. Instead we should vectorize with
 ; contiguous loads, from %a plus offsets 0 to 3, or offsets 1 to 4.
 
-define void @s116_modified(float* %a) {
+define void @s116_modified(ptr %a) {
 ; CHECK-LABEL: @s116_modified(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
-; CHECK-NEXT:    [[LD0:%.*]] = load float, float* [[GEP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 1
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3
+; CHECK-NEXT:    [[LD0:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[GEP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
@@ -33,28 +30,26 @@ define void @s116_modified(float* %a) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 undef, i32 2, i32 4>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP12]], ptr [[A]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %gep0 = getelementptr inbounds float, float* %a, i64 0
-  %gep1 = getelementptr inbounds float, float* %a, i64 1
-  %gep2 = getelementptr inbounds float, float* %a, i64 2
-  %gep3 = getelementptr inbounds float, float* %a, i64 3
-  %gep4 = getelementptr inbounds float, float* %a, i64 4
-  %ld0 = load float, float* %gep0
-  %ld1 = load float, float* %gep1
-  %ld2 = load float, float* %gep2
-  %ld3 = load float, float* %gep3
-  %ld4 = load float, float* %gep4
+  %gep1 = getelementptr inbounds float, ptr %a, i64 1
+  %gep2 = getelementptr inbounds float, ptr %a, i64 2
+  %gep3 = getelementptr inbounds float, ptr %a, i64 3
+  %gep4 = getelementptr inbounds float, ptr %a, i64 4
+  %ld0 = load float, ptr %a
+  %ld1 = load float, ptr %gep1
+  %ld2 = load float, ptr %gep2
+  %ld3 = load float, ptr %gep3
+  %ld4 = load float, ptr %gep4
   %mul0 = fmul fast float %ld0, %ld1
   %mul1 = fmul fast float %ld2, %ld1
   %mul2 = fmul fast float %ld3, %ld2
   %mul3 = fmul fast float %ld4, %ld3
-  store float %mul0, float* %gep0
-  store float %mul1, float* %gep1
-  store float %mul2, float* %gep2
-  store float %mul3, float* %gep3
+  store float %mul0, ptr %a
+  store float %mul1, ptr %gep1
+  store float %mul2, ptr %gep2
+  store float %mul3, ptr %gep3
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll
index fc32e2ea8dc1f..70b7adfd6456e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll
@@ -29,34 +29,30 @@ define i32 @s352() {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[DOT_115:%.*]] = phi float [ 0.000000e+00, [[PREHEADER]] ], [ [[ADD39:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA:%.*]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX6]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA:%.*]], ptr @global_data, i64 0, i32 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 3, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[DOT_115]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[ADD15:%.*]] = fadd float [[ADD]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP7]]
-; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[ARRAYIDX18]] to <2 x float>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast float* [[ARRAYIDX21]] to <2 x float>*
-; CHECK-NEXT:    [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[TMP10]], align 4
+; CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 0, i64 [[TMP7]]
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 3, i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX18]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[ARRAYIDX21]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x float> [[TMP9]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[TMP12]], i32 0
 ; CHECK-NEXT:    [[ADD23:%.*]] = fadd float [[ADD15]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[TMP12]], i32 1
 ; CHECK-NEXT:    [[ADD31:%.*]] = fadd float [[ADD23]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
-; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[ARRAYIDX34]], align 4
-; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[ARRAYIDX37]], align 4
+; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 0, i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
+; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 3, i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr [[ARRAYIDX37]], align 4
 ; CHECK-NEXT:    [[MUL38:%.*]] = fmul float [[TMP16]], [[TMP17]]
 ; CHECK-NEXT:    [[ADD39]] = fadd float [[ADD31]], [[MUL38]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
@@ -81,38 +77,38 @@ for.cond.cleanup3:
 for.body:
   %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for.body ]
   %dot.115 = phi float [ 0.000000e+00, %preheader ], [ %add39, %for.body ]
-  %arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx6 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
-  %1 = load float, float* %arrayidx6, align 4
+  %arrayidx = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx6 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul float %0, %1
   %add = fadd float %dot.115, %mul7
   %2 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx10 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %2
-  %3 = load float, float* %arrayidx10, align 4
-  %arrayidx13 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %2
-  %4 = load float, float* %arrayidx13, align 4
+  %arrayidx10 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %2
+  %3 = load float, ptr %arrayidx10, align 4
+  %arrayidx13 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %2
+  %4 = load float, ptr %arrayidx13, align 4
   %mul14 = fmul float %3, %4
   %add15 = fadd float %add, %mul14
   %5 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx18 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %5
-  %6 = load float, float* %arrayidx18, align 4
-  %arrayidx21 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %5
-  %7 = load float, float* %arrayidx21, align 4
+  %arrayidx18 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %5
+  %6 = load float, ptr %arrayidx18, align 4
+  %arrayidx21 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %5
+  %7 = load float, ptr %arrayidx21, align 4
   %mul22 = fmul float %6, %7
   %add23 = fadd float %add15, %mul22
   %8 = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx26 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %8
-  %9 = load float, float* %arrayidx26, align 4
-  %arrayidx29 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %8
-  %10 = load float, float* %arrayidx29, align 4
+  %arrayidx26 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %8
+  %9 = load float, ptr %arrayidx26, align 4
+  %arrayidx29 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %8
+  %10 = load float, ptr %arrayidx29, align 4
   %mul30 = fmul float %9, %10
   %add31 = fadd float %add23, %mul30
   %11 = add nuw nsw i64 %indvars.iv, 4
-  %arrayidx34 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %11
-  %12 = load float, float* %arrayidx34, align 4
-  %arrayidx37 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %11
-  %13 = load float, float* %arrayidx37, align 4
+  %arrayidx34 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %11
+  %12 = load float, ptr %arrayidx34, align 4
+  %arrayidx37 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %11
+  %13 = load float, ptr %arrayidx37, align 4
   %mul38 = fmul float %12, %13
   %add39 = fadd float %add31, %mul38
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
index b1adeec0f636e..ddeaff9327e3d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
@@ -5,1059 +5,1009 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
-define void @select_umin_8xi16(i16* %ptr, i16 %x) {
+define void @select_umin_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umin_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp ult i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp ult i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp ult i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp ult i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp ult i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp ult i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp ult i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp ult i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_umin_4xi32(i32* %ptr, i32 %x) {
+define void @select_umin_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umin_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp ult i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp ult i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp ult i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp ult i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
-define void @select_ule_ugt_mix_4xi32(i32* %ptr, i32 %x) {
+define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp ult i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp ugt i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp ult i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp ugt i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of umin, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_umin_2xi64(i64* %ptr, i64 %x) {
+define void @select_umin_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umin_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp ult i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp ult i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
 
 
-define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) {
+define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umin_ule_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp ule i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp ule i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp ule i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp ule i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp ule i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp ule i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp ule i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp ule i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) {
+define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umin_ule_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp ule i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp ule i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp ule i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp ule i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of umin, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) {
+define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umin_ule_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp ule i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp ule i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
 
-define void @select_smin_8xi16(i16* %ptr, i16 %x) {
+define void @select_smin_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smin_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp slt i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp slt i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp slt i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp slt i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp slt i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp slt i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp slt i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp slt i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_smin_4xi32(i32* %ptr, i32 %x) {
+define void @select_smin_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smin_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp slt i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp slt i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp slt i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp slt i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of smin, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_smin_2xi64(i64* %ptr, i64 %x) {
+define void @select_smin_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smin_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp slt i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp slt i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
 
-define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) {
+define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smin_sle_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp sle i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp sle i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp sle i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp sle i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp sle i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp sle i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp sle i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp sle i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) {
+define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smin_sle_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp sle i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp sle i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp sle i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp sle i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of smin, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) {
+define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smin_sle_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp sle i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp sle i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
-define void @select_umax_8xi16(i16* %ptr, i16 %x) {
+define void @select_umax_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umax_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp ugt i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp ugt i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp ugt i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp ugt i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp ugt i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp ugt i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp ugt i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp ugt i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_umax_4xi32(i32* %ptr, i32 %x) {
+define void @select_umax_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umax_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp ugt i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp ugt i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp ugt i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp ugt i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of umax, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_umax_2xi64(i64* %ptr, i64 %x) {
+define void @select_umax_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umax_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp ugt i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp ugt i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
 
-define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) {
+define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umax_uge_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp uge i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp uge i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp uge i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp uge i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp uge i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp uge i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp uge i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp uge i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) {
+define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umax_uge_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp uge i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp uge i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp uge i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp uge i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of umax, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) {
+define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umax_uge_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp uge i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp uge i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
 
-define void @select_smax_8xi16(i16* %ptr, i16 %x) {
+define void @select_smax_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smax_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp sgt i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp sgt i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp sgt i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp sgt i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp sgt i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp sgt i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp sgt i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp sgt i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_smax_4xi32(i32* %ptr, i32 %x) {
+define void @select_smax_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smax_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp sgt i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp sgt i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp sgt i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp sgt i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of smax, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_smax_2xi64(i64* %ptr, i64 %x) {
+define void @select_smax_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smax_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp sgt i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp sgt i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }
 
 
-define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) {
+define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smax_sge_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp sge i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp sge i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp sge i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp sge i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp sge i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp sge i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp sge i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp sge i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) {
+define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smax_sge_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp sge i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
-  store i32 %s.0, i32* %ptr, align 4
+  store i32 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp sge i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
-  store i32 %s.1, i32* %gep.1, align 4
+  store i32 %s.1, ptr %gep.1, align 4
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp sge i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
-  store i32 %s.2, i32* %gep.2, align 4
+  store i32 %s.2, ptr %gep.2, align 4
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp sge i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
-  store i32 %s.3, i32* %gep.3, align 4
+  store i32 %s.3, ptr %gep.3, align 4
 
   ret void
 }
 
 ; There is no <2 x i64> version of smax, but we can efficiently lower
 ; compare/select pairs with uniform predicates.
-define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) {
+define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smax_sge_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp sge i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
-  store i64 %s.0, i64* %ptr, align 4
+  store i64 %s.0, ptr %ptr, align 4
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp sge i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
-  store i64 %s.1, i64* %gep.1, align 4
+  store i64 %s.1, ptr %gep.1, align 4
 
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
index 7eedbe98d4fa9..02468f389f029 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
@@ -9,180 +9,180 @@ target triple = "arm64-apple-ios5.0.0"
 
 ; We need selects with a uniform predicate to lower effectively to vector
 ; instructions.
-define void @select_mixed_predicates_8xi16(i16* %ptr, i16 %x) {
+define void @select_mixed_predicates_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_mixed_predicates_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383
 ; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 [[X:%.*]]
-; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
-; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    store i16 [[S_0]], ptr [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, ptr [[GEP_1]], align 2
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i16 [[L_1]], 16383
 ; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
-; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
-; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    store i16 [[S_1]], ptr [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, ptr [[GEP_2]], align 2
 ; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq i16 [[L_2]], 16383
 ; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
-; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
-; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    store i16 [[S_2]], ptr [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, ptr [[GEP_3]], align 2
 ; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ne i16 [[L_3]], 16383
 ; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
-; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
-; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    store i16 [[S_3]], ptr [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, ptr [[GEP_4]], align 2
 ; CHECK-NEXT:    [[CMP_4:%.*]] = icmp eq i16 [[L_4]], 16383
 ; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
-; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
-; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    store i16 [[S_4]], ptr [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, ptr [[GEP_5]], align 2
 ; CHECK-NEXT:    [[CMP_5:%.*]] = icmp ule i16 [[L_5]], 16383
 ; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
-; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
-; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    store i16 [[S_5]], ptr [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, ptr [[GEP_6]], align 2
 ; CHECK-NEXT:    [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383
 ; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
-; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    store i16 [[S_6]], ptr [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, ptr [[GEP_7]], align 2
 ; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383
 ; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 [[X]]
-; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    store i16 [[S_7]], ptr [[GEP_7]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp ult i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 %x
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp sgt i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 %x
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp eq i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 %x
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp ne i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 %x
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp eq i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 %x
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp ule i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 %x
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp ult i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 %x
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp ult i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 %x
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_uniform_ugt_7xi8(i8* %ptr, i8 %x) {
+define void @select_uniform_ugt_7xi8(ptr %ptr, i8 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_7xi8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[L_0:%.*]] = load i8, i8* [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[L_0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ugt i8 [[L_0]], -1
 ; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i8 [[L_0]], i8 [[X:%.*]]
-; CHECK-NEXT:    store i8 [[S_0]], i8* [[PTR]], align 2
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 1
-; CHECK-NEXT:    [[L_1:%.*]] = load i8, i8* [[GEP_1]], align 1
+; CHECK-NEXT:    store i8 [[S_0]], ptr [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i8, ptr [[GEP_1]], align 1
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i8 [[L_1]], -1
 ; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i8 [[L_1]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_1]], i8* [[GEP_1]], align 2
-; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2
-; CHECK-NEXT:    [[L_2:%.*]] = load i8, i8* [[GEP_2]], align 1
+; CHECK-NEXT:    store i8 [[S_1]], ptr [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i8, ptr [[GEP_2]], align 1
 ; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ugt i8 [[L_2]], -1
 ; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i8 [[L_2]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_2]], i8* [[GEP_2]], align 2
-; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3
-; CHECK-NEXT:    [[L_3:%.*]] = load i8, i8* [[GEP_3]], align 1
+; CHECK-NEXT:    store i8 [[S_2]], ptr [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i8, ptr [[GEP_3]], align 1
 ; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ugt i8 [[L_3]], -1
 ; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i8 [[L_3]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_3]], i8* [[GEP_3]], align 2
-; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4
-; CHECK-NEXT:    [[L_4:%.*]] = load i8, i8* [[GEP_4]], align 1
+; CHECK-NEXT:    store i8 [[S_3]], ptr [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i8, ptr [[GEP_4]], align 1
 ; CHECK-NEXT:    [[CMP_4:%.*]] = icmp ugt i8 [[L_4]], -1
 ; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i8 [[L_4]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_4]], i8* [[GEP_4]], align 2
-; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5
-; CHECK-NEXT:    [[L_5:%.*]] = load i8, i8* [[GEP_5]], align 1
+; CHECK-NEXT:    store i8 [[S_4]], ptr [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i8, ptr [[GEP_5]], align 1
 ; CHECK-NEXT:    [[CMP_5:%.*]] = icmp ugt i8 [[L_5]], -1
 ; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i8 [[L_5]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_5]], i8* [[GEP_5]], align 2
-; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6
-; CHECK-NEXT:    [[L_6:%.*]] = load i8, i8* [[GEP_6]], align 1
+; CHECK-NEXT:    store i8 [[S_5]], ptr [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i8, ptr [[GEP_6]], align 1
 ; CHECK-NEXT:    [[CMP_6:%.*]] = icmp ugt i8 [[L_6]], -1
 ; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i8 [[L_6]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_6]], i8* [[GEP_6]], align 2
+; CHECK-NEXT:    store i8 [[S_6]], ptr [[GEP_6]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i8, i8* %ptr
+  %l.0 = load i8, ptr %ptr
   %cmp.0 = icmp ugt i8 %l.0, 16383
   %s.0 = select i1 %cmp.0, i8 %l.0, i8 %x
-  store i8 %s.0, i8* %ptr, align 2
+  store i8 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i8, i8* %ptr, i8 1
-  %l.1 = load i8, i8* %gep.1
+  %gep.1 = getelementptr inbounds i8, ptr %ptr, i8 1
+  %l.1 = load i8, ptr %gep.1
   %cmp.1 = icmp ugt i8 %l.1, 16383
   %s.1 = select i1 %cmp.1, i8 %l.1, i8 %x
-  store i8 %s.1, i8* %gep.1, align 2
+  store i8 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i8, i8* %ptr, i8 2
-  %l.2 = load i8, i8* %gep.2
+  %gep.2 = getelementptr inbounds i8, ptr %ptr, i8 2
+  %l.2 = load i8, ptr %gep.2
   %cmp.2 = icmp ugt i8 %l.2, 16383
   %s.2 = select i1 %cmp.2, i8 %l.2, i8 %x
-  store i8 %s.2, i8* %gep.2, align 2
+  store i8 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i8, i8* %ptr, i8 3
-  %l.3 = load i8, i8* %gep.3
+  %gep.3 = getelementptr inbounds i8, ptr %ptr, i8 3
+  %l.3 = load i8, ptr %gep.3
   %cmp.3 = icmp ugt i8 %l.3, 16383
   %s.3 = select i1 %cmp.3, i8 %l.3, i8 %x
-  store i8 %s.3, i8* %gep.3, align 2
+  store i8 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i8, i8* %ptr, i8 4
-  %l.4 = load i8, i8* %gep.4
+  %gep.4 = getelementptr inbounds i8, ptr %ptr, i8 4
+  %l.4 = load i8, ptr %gep.4
   %cmp.4 = icmp ugt i8 %l.4, 16383
   %s.4 = select i1 %cmp.4, i8 %l.4, i8 %x
-  store i8 %s.4, i8* %gep.4, align 2
+  store i8 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i8, i8* %ptr, i8 5
-  %l.5 = load i8, i8* %gep.5
+  %gep.5 = getelementptr inbounds i8, ptr %ptr, i8 5
+  %l.5 = load i8, ptr %gep.5
   %cmp.5 = icmp ugt i8 %l.5, 16383
   %s.5 = select i1 %cmp.5, i8 %l.5, i8 %x
-  store i8 %s.5, i8* %gep.5, align 2
+  store i8 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i8, i8* %ptr, i8 6
-  %l.6 = load i8, i8* %gep.6
+  %gep.6 = getelementptr inbounds i8, ptr %ptr, i8 6
+  %l.6 = load i8, ptr %gep.6
   %cmp.6 = icmp ugt i8 %l.6, 16383
   %s.6 = select i1 %cmp.6, i8 %l.6, i8 %x
-  store i8 %s.6, i8* %gep.6, align 2
+  store i8 %s.6, ptr %gep.6, align 2
 
   ret void
 }
@@ -190,418 +190,404 @@ entry:
 
 ; Positive tests.
 
-define void @select_uniform_ugt_8xi8(i8* %ptr, i8 %x) {
+define void @select_uniform_ugt_8xi8(ptr %ptr, i8 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_8xi8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
-; CHECK-NEXT:    store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2
+; CHECK-NEXT:    store <8 x i8> [[TMP4]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i8, i8* %ptr
+  %l.0 = load i8, ptr %ptr
   %cmp.0 = icmp ugt i8 %l.0, 16383
   %s.0 = select i1 %cmp.0, i8 %l.0, i8 %x
-  store i8 %s.0, i8* %ptr, align 2
+  store i8 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i8, i8* %ptr, i8 1
-  %l.1 = load i8, i8* %gep.1
+  %gep.1 = getelementptr inbounds i8, ptr %ptr, i8 1
+  %l.1 = load i8, ptr %gep.1
   %cmp.1 = icmp ugt i8 %l.1, 16383
   %s.1 = select i1 %cmp.1, i8 %l.1, i8 %x
-  store i8 %s.1, i8* %gep.1, align 2
+  store i8 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i8, i8* %ptr, i8 2
-  %l.2 = load i8, i8* %gep.2
+  %gep.2 = getelementptr inbounds i8, ptr %ptr, i8 2
+  %l.2 = load i8, ptr %gep.2
   %cmp.2 = icmp ugt i8 %l.2, 16383
   %s.2 = select i1 %cmp.2, i8 %l.2, i8 %x
-  store i8 %s.2, i8* %gep.2, align 2
+  store i8 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i8, i8* %ptr, i8 3
-  %l.3 = load i8, i8* %gep.3
+  %gep.3 = getelementptr inbounds i8, ptr %ptr, i8 3
+  %l.3 = load i8, ptr %gep.3
   %cmp.3 = icmp ugt i8 %l.3, 16383
   %s.3 = select i1 %cmp.3, i8 %l.3, i8 %x
-  store i8 %s.3, i8* %gep.3, align 2
+  store i8 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i8, i8* %ptr, i8 4
-  %l.4 = load i8, i8* %gep.4
+  %gep.4 = getelementptr inbounds i8, ptr %ptr, i8 4
+  %l.4 = load i8, ptr %gep.4
   %cmp.4 = icmp ugt i8 %l.4, 16383
   %s.4 = select i1 %cmp.4, i8 %l.4, i8 %x
-  store i8 %s.4, i8* %gep.4, align 2
+  store i8 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i8, i8* %ptr, i8 5
-  %l.5 = load i8, i8* %gep.5
+  %gep.5 = getelementptr inbounds i8, ptr %ptr, i8 5
+  %l.5 = load i8, ptr %gep.5
   %cmp.5 = icmp ugt i8 %l.5, 16383
   %s.5 = select i1 %cmp.5, i8 %l.5, i8 %x
-  store i8 %s.5, i8* %gep.5, align 2
+  store i8 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i8, i8* %ptr, i8 6
-  %l.6 = load i8, i8* %gep.6
+  %gep.6 = getelementptr inbounds i8, ptr %ptr, i8 6
+  %l.6 = load i8, ptr %gep.6
   %cmp.6 = icmp ugt i8 %l.6, 16383
   %s.6 = select i1 %cmp.6, i8 %l.6, i8 %x
-  store i8 %s.6, i8* %gep.6, align 2
+  store i8 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i8, i8* %ptr, i8 7
-  %l.7 = load i8, i8* %gep.7
+  %gep.7 = getelementptr inbounds i8, ptr %ptr, i8 7
+  %l.7 = load i8, ptr %gep.7
   %cmp.7 = icmp ugt i8 %l.7, 16383
   %s.7 = select i1 %cmp.7, i8 %l.7, i8 %x
-  store i8 %s.7, i8* %gep.7, align 2
+  store i8 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_uniform_ugt_16xi8(i8* %ptr, i8 %x) {
+define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_16xi8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
-; CHECK-NEXT:    store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2
-; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 8
-; CHECK-NEXT:    [[L_8:%.*]] = load i8, i8* [[GEP_8]], align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP4]], ptr [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 8
+; CHECK-NEXT:    [[L_8:%.*]] = load i8, ptr [[GEP_8]], align 1
 ; CHECK-NEXT:    [[CMP_8:%.*]] = icmp ugt i8 [[L_8]], -1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP6]], i8 [[X]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0
-; CHECK-NEXT:    store i8 [[TMP7]], i8* [[GEP_8]], align 2
-; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 9
-; CHECK-NEXT:    [[L_9:%.*]] = load i8, i8* [[GEP_9]], align 1
+; CHECK-NEXT:    store i8 [[TMP7]], ptr [[GEP_8]], align 2
+; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 9
+; CHECK-NEXT:    [[L_9:%.*]] = load i8, ptr [[GEP_9]], align 1
 ; CHECK-NEXT:    [[CMP_9:%.*]] = icmp ugt i8 [[L_9]], -1
 ; CHECK-NEXT:    [[S_9:%.*]] = select i1 [[CMP_9]], i8 [[L_9]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_9]], i8* [[GEP_9]], align 2
-; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 10
-; CHECK-NEXT:    [[L_10:%.*]] = load i8, i8* [[GEP_10]], align 1
+; CHECK-NEXT:    store i8 [[S_9]], ptr [[GEP_9]], align 2
+; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 10
+; CHECK-NEXT:    [[L_10:%.*]] = load i8, ptr [[GEP_10]], align 1
 ; CHECK-NEXT:    [[CMP_10:%.*]] = icmp ugt i8 [[L_10]], -1
 ; CHECK-NEXT:    [[S_10:%.*]] = select i1 [[CMP_10]], i8 [[L_10]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_10]], i8* [[GEP_10]], align 2
-; CHECK-NEXT:    [[GEP_11:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 11
-; CHECK-NEXT:    [[L_11:%.*]] = load i8, i8* [[GEP_11]], align 1
+; CHECK-NEXT:    store i8 [[S_10]], ptr [[GEP_10]], align 2
+; CHECK-NEXT:    [[GEP_11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 11
+; CHECK-NEXT:    [[L_11:%.*]] = load i8, ptr [[GEP_11]], align 1
 ; CHECK-NEXT:    [[CMP_11:%.*]] = icmp ugt i8 [[L_11]], -1
 ; CHECK-NEXT:    [[S_11:%.*]] = select i1 [[CMP_11]], i8 [[L_11]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_11]], i8* [[GEP_11]], align 2
-; CHECK-NEXT:    [[GEP_12:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 12
-; CHECK-NEXT:    [[L_12:%.*]] = load i8, i8* [[GEP_12]], align 1
+; CHECK-NEXT:    store i8 [[S_11]], ptr [[GEP_11]], align 2
+; CHECK-NEXT:    [[GEP_12:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 12
+; CHECK-NEXT:    [[L_12:%.*]] = load i8, ptr [[GEP_12]], align 1
 ; CHECK-NEXT:    [[CMP_12:%.*]] = icmp ugt i8 [[L_12]], -1
 ; CHECK-NEXT:    [[S_12:%.*]] = select i1 [[CMP_12]], i8 [[L_12]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_12]], i8* [[GEP_12]], align 2
-; CHECK-NEXT:    [[GEP_13:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 13
-; CHECK-NEXT:    [[L_13:%.*]] = load i8, i8* [[GEP_13]], align 1
+; CHECK-NEXT:    store i8 [[S_12]], ptr [[GEP_12]], align 2
+; CHECK-NEXT:    [[GEP_13:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 13
+; CHECK-NEXT:    [[L_13:%.*]] = load i8, ptr [[GEP_13]], align 1
 ; CHECK-NEXT:    [[CMP_13:%.*]] = icmp ugt i8 [[L_13]], -1
 ; CHECK-NEXT:    [[S_13:%.*]] = select i1 [[CMP_13]], i8 [[L_13]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_13]], i8* [[GEP_13]], align 2
-; CHECK-NEXT:    [[GEP_14:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 14
-; CHECK-NEXT:    [[L_14:%.*]] = load i8, i8* [[GEP_14]], align 1
+; CHECK-NEXT:    store i8 [[S_13]], ptr [[GEP_13]], align 2
+; CHECK-NEXT:    [[GEP_14:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 14
+; CHECK-NEXT:    [[L_14:%.*]] = load i8, ptr [[GEP_14]], align 1
 ; CHECK-NEXT:    [[CMP_14:%.*]] = icmp ugt i8 [[L_14]], -1
 ; CHECK-NEXT:    [[S_14:%.*]] = select i1 [[CMP_14]], i8 [[L_14]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_14]], i8* [[GEP_14]], align 2
-; CHECK-NEXT:    [[GEP_15:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 15
-; CHECK-NEXT:    [[L_15:%.*]] = load i8, i8* [[GEP_15]], align 1
+; CHECK-NEXT:    store i8 [[S_14]], ptr [[GEP_14]], align 2
+; CHECK-NEXT:    [[GEP_15:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i8 15
+; CHECK-NEXT:    [[L_15:%.*]] = load i8, ptr [[GEP_15]], align 1
 ; CHECK-NEXT:    [[CMP_15:%.*]] = icmp ugt i8 [[L_15]], -1
 ; CHECK-NEXT:    [[S_15:%.*]] = select i1 [[CMP_15]], i8 [[L_15]], i8 [[X]]
-; CHECK-NEXT:    store i8 [[S_15]], i8* [[GEP_15]], align 2
+; CHECK-NEXT:    store i8 [[S_15]], ptr [[GEP_15]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i8, i8* %ptr
+  %l.0 = load i8, ptr %ptr
   %cmp.0 = icmp ugt i8 %l.0, 16383
   %s.0 = select i1 %cmp.0, i8 %l.0, i8 %x
-  store i8 %s.0, i8* %ptr, align 2
+  store i8 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i8, i8* %ptr, i8 1
-  %l.1 = load i8, i8* %gep.1
+  %gep.1 = getelementptr inbounds i8, ptr %ptr, i8 1
+  %l.1 = load i8, ptr %gep.1
   %cmp.1 = icmp ugt i8 %l.1, 16383
   %s.1 = select i1 %cmp.1, i8 %l.1, i8 %x
-  store i8 %s.1, i8* %gep.1, align 2
+  store i8 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i8, i8* %ptr, i8 2
-  %l.2 = load i8, i8* %gep.2
+  %gep.2 = getelementptr inbounds i8, ptr %ptr, i8 2
+  %l.2 = load i8, ptr %gep.2
   %cmp.2 = icmp ugt i8 %l.2, 16383
   %s.2 = select i1 %cmp.2, i8 %l.2, i8 %x
-  store i8 %s.2, i8* %gep.2, align 2
+  store i8 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i8, i8* %ptr, i8 3
-  %l.3 = load i8, i8* %gep.3
+  %gep.3 = getelementptr inbounds i8, ptr %ptr, i8 3
+  %l.3 = load i8, ptr %gep.3
   %cmp.3 = icmp ugt i8 %l.3, 16383
   %s.3 = select i1 %cmp.3, i8 %l.3, i8 %x
-  store i8 %s.3, i8* %gep.3, align 2
+  store i8 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i8, i8* %ptr, i8 4
-  %l.4 = load i8, i8* %gep.4
+  %gep.4 = getelementptr inbounds i8, ptr %ptr, i8 4
+  %l.4 = load i8, ptr %gep.4
   %cmp.4 = icmp ugt i8 %l.4, 16383
   %s.4 = select i1 %cmp.4, i8 %l.4, i8 %x
-  store i8 %s.4, i8* %gep.4, align 2
+  store i8 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i8, i8* %ptr, i8 5
-  %l.5 = load i8, i8* %gep.5
+  %gep.5 = getelementptr inbounds i8, ptr %ptr, i8 5
+  %l.5 = load i8, ptr %gep.5
   %cmp.5 = icmp ugt i8 %l.5, 16383
   %s.5 = select i1 %cmp.5, i8 %l.5, i8 %x
-  store i8 %s.5, i8* %gep.5, align 2
+  store i8 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i8, i8* %ptr, i8 6
-  %l.6 = load i8, i8* %gep.6
+  %gep.6 = getelementptr inbounds i8, ptr %ptr, i8 6
+  %l.6 = load i8, ptr %gep.6
   %cmp.6 = icmp ugt i8 %l.6, 16383
   %s.6 = select i1 %cmp.6, i8 %l.6, i8 %x
-  store i8 %s.6, i8* %gep.6, align 2
+  store i8 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i8, i8* %ptr, i8 7
-  %l.7 = load i8, i8* %gep.7
+  %gep.7 = getelementptr inbounds i8, ptr %ptr, i8 7
+  %l.7 = load i8, ptr %gep.7
   %cmp.7 = icmp ugt i8 %l.7, 16383
   %s.7 = select i1 %cmp.7, i8 %l.7, i8 %x
-  store i8 %s.7, i8* %gep.7, align 2
+  store i8 %s.7, ptr %gep.7, align 2
 
-  %gep.8 = getelementptr inbounds i8, i8* %ptr, i8 8
-  %l.8 = load i8, i8* %gep.8
+  %gep.8 = getelementptr inbounds i8, ptr %ptr, i8 8
+  %l.8 = load i8, ptr %gep.8
   %cmp.8 = icmp ugt i8 %l.8, 16383
   %s.8 = select i1 %cmp.8, i8 %l.0, i8 %x
-  store i8 %s.0, i8* %gep.8, align 2
+  store i8 %s.0, ptr %gep.8, align 2
 
-  %gep.9 = getelementptr inbounds i8, i8* %ptr, i8 9
-  %l.9 = load i8, i8* %gep.9
+  %gep.9 = getelementptr inbounds i8, ptr %ptr, i8 9
+  %l.9 = load i8, ptr %gep.9
   %cmp.9 = icmp ugt i8 %l.9, 16383
   %s.9 = select i1 %cmp.9, i8 %l.9, i8 %x
-  store i8 %s.9, i8* %gep.9, align 2
+  store i8 %s.9, ptr %gep.9, align 2
 
-  %gep.10 = getelementptr inbounds i8, i8* %ptr, i8 10
-  %l.10 = load i8, i8* %gep.10
+  %gep.10 = getelementptr inbounds i8, ptr %ptr, i8 10
+  %l.10 = load i8, ptr %gep.10
   %cmp.10 = icmp ugt i8 %l.10, 16383
   %s.10 = select i1 %cmp.10, i8 %l.10, i8 %x
-  store i8 %s.10, i8* %gep.10, align 2
+  store i8 %s.10, ptr %gep.10, align 2
 
-  %gep.11 = getelementptr inbounds i8, i8* %ptr, i8 11
-  %l.11 = load i8, i8* %gep.11
+  %gep.11 = getelementptr inbounds i8, ptr %ptr, i8 11
+  %l.11 = load i8, ptr %gep.11
   %cmp.11 = icmp ugt i8 %l.11, 16383
   %s.11 = select i1 %cmp.11, i8 %l.11, i8 %x
-  store i8 %s.11, i8* %gep.11, align 2
+  store i8 %s.11, ptr %gep.11, align 2
 
-  %gep.12 = getelementptr inbounds i8, i8* %ptr, i8 12
-  %l.12 = load i8, i8* %gep.12
+  %gep.12 = getelementptr inbounds i8, ptr %ptr, i8 12
+  %l.12 = load i8, ptr %gep.12
   %cmp.12 = icmp ugt i8 %l.12, 16383
   %s.12 = select i1 %cmp.12, i8 %l.12, i8 %x
-  store i8 %s.12, i8* %gep.12, align 2
+  store i8 %s.12, ptr %gep.12, align 2
 
-  %gep.13 = getelementptr inbounds i8, i8* %ptr, i8 13
-  %l.13 = load i8, i8* %gep.13
+  %gep.13 = getelementptr inbounds i8, ptr %ptr, i8 13
+  %l.13 = load i8, ptr %gep.13
   %cmp.13 = icmp ugt i8 %l.13, 16383
   %s.13 = select i1 %cmp.13, i8 %l.13, i8 %x
-  store i8 %s.13, i8* %gep.13, align 2
+  store i8 %s.13, ptr %gep.13, align 2
 
-  %gep.14 = getelementptr inbounds i8, i8* %ptr, i8 14
-  %l.14 = load i8, i8* %gep.14
+  %gep.14 = getelementptr inbounds i8, ptr %ptr, i8 14
+  %l.14 = load i8, ptr %gep.14
   %cmp.14 = icmp ugt i8 %l.14, 16383
   %s.14 = select i1 %cmp.14, i8 %l.14, i8 %x
-  store i8 %s.14, i8* %gep.14, align 2
+  store i8 %s.14, ptr %gep.14, align 2
 
-  %gep.15 = getelementptr inbounds i8, i8* %ptr, i8 15
-  %l.15 = load i8, i8* %gep.15
+  %gep.15 = getelementptr inbounds i8, ptr %ptr, i8 15
+  %l.15 = load i8, ptr %gep.15
   %cmp.15 = icmp ugt i8 %l.15, 16383
   %s.15 = select i1 %cmp.15, i8 %l.15, i8 %x
-  store i8 %s.15, i8* %gep.15, align 2
+  store i8 %s.15, ptr %gep.15, align 2
 
   ret void
 }
 
 
-define void @select_uniform_ugt_4xi16(i16* %ptr, i16 %x) {
+define void @select_uniform_ugt_4xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_4xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
-; CHECK-NEXT:    store <4 x i16> [[TMP4]], <4 x i16>* [[TMP5]], align 2
+; CHECK-NEXT:    store <4 x i16> [[TMP4]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp ugt i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 %x
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp ugt i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 %x
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp ugt i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 %x
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp ugt i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 %x
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
   ret void
 }
 
-define void @select_uniform_ult_8xi16(i16* %ptr, i16 %x) {
+define void @select_uniform_ult_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_uniform_ult_8xi16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* [[TMP5]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP4]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i16, i16* %ptr
+  %l.0 = load i16, ptr %ptr
   %cmp.0 = icmp ult i16 %l.0, 16383
   %s.0 = select i1 %cmp.0, i16 %l.0, i16 %x
-  store i16 %s.0, i16* %ptr, align 2
+  store i16 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
-  %l.1 = load i16, i16* %gep.1
+  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
+  %l.1 = load i16, ptr %gep.1
   %cmp.1 = icmp ult i16 %l.1, 16383
   %s.1 = select i1 %cmp.1, i16 %l.1, i16 %x
-  store i16 %s.1, i16* %gep.1, align 2
+  store i16 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
-  %l.2 = load i16, i16* %gep.2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
+  %l.2 = load i16, ptr %gep.2
   %cmp.2 = icmp ult i16 %l.2, 16383
   %s.2 = select i1 %cmp.2, i16 %l.2, i16 %x
-  store i16 %s.2, i16* %gep.2, align 2
+  store i16 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
-  %l.3 = load i16, i16* %gep.3
+  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
+  %l.3 = load i16, ptr %gep.3
   %cmp.3 = icmp ult i16 %l.3, 16383
   %s.3 = select i1 %cmp.3, i16 %l.3, i16 %x
-  store i16 %s.3, i16* %gep.3, align 2
+  store i16 %s.3, ptr %gep.3, align 2
 
-  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
-  %l.4 = load i16, i16* %gep.4
+  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
+  %l.4 = load i16, ptr %gep.4
   %cmp.4 = icmp ult i16 %l.4, 16383
   %s.4 = select i1 %cmp.4, i16 %l.4, i16 %x
-  store i16 %s.4, i16* %gep.4, align 2
+  store i16 %s.4, ptr %gep.4, align 2
 
-  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
-  %l.5 = load i16, i16* %gep.5
+  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
+  %l.5 = load i16, ptr %gep.5
   %cmp.5 = icmp ult i16 %l.5, 16383
   %s.5 = select i1 %cmp.5, i16 %l.5, i16 %x
-  store i16 %s.5, i16* %gep.5, align 2
+  store i16 %s.5, ptr %gep.5, align 2
 
-  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
-  %l.6 = load i16, i16* %gep.6
+  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
+  %l.6 = load i16, ptr %gep.6
   %cmp.6 = icmp ult i16 %l.6, 16383
   %s.6 = select i1 %cmp.6, i16 %l.6, i16 %x
-  store i16 %s.6, i16* %gep.6, align 2
+  store i16 %s.6, ptr %gep.6, align 2
 
-  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
-  %l.7 = load i16, i16* %gep.7
+  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
+  %l.7 = load i16, ptr %gep.7
   %cmp.7 = icmp ult i16 %l.7, 16383
   %s.7 = select i1 %cmp.7, i16 %l.7, i16 %x
-  store i16 %s.7, i16* %gep.7, align 2
+  store i16 %s.7, ptr %gep.7, align 2
   ret void
 }
 
-define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) {
+define void @select_uniform_eq_2xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_uniform_eq_2xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 2
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp eq i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 %x
-  store i32 %s.0, i32* %ptr, align 2
+  store i32 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp eq i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 %x
-  store i32 %s.1, i32* %gep.1, align 2
+  store i32 %s.1, ptr %gep.1, align 2
 
   ret void
 }
 
-define void @select_uniform_eq_4xi32(i32* %ptr, i32 %x) {
+define void @select_uniform_eq_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_uniform_eq_4xi32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 2
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i32, i32* %ptr
+  %l.0 = load i32, ptr %ptr
   %cmp.0 = icmp eq i32 %l.0, 16383
   %s.0 = select i1 %cmp.0, i32 %l.0, i32 %x
-  store i32 %s.0, i32* %ptr, align 2
+  store i32 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
-  %l.1 = load i32, i32* %gep.1
+  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
+  %l.1 = load i32, ptr %gep.1
   %cmp.1 = icmp eq i32 %l.1, 16383
   %s.1 = select i1 %cmp.1, i32 %l.1, i32 %x
-  store i32 %s.1, i32* %gep.1, align 2
+  store i32 %s.1, ptr %gep.1, align 2
 
-  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
-  %l.2 = load i32, i32* %gep.2
+  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
+  %l.2 = load i32, ptr %gep.2
   %cmp.2 = icmp eq i32 %l.2, 16383
   %s.2 = select i1 %cmp.2, i32 %l.2, i32 %x
-  store i32 %s.2, i32* %gep.2, align 2
+  store i32 %s.2, ptr %gep.2, align 2
 
-  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
-  %l.3 = load i32, i32* %gep.3
+  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
+  %l.3 = load i32, ptr %gep.3
   %cmp.3 = icmp eq i32 %l.3, 16383
   %s.3 = select i1 %cmp.3, i32 %l.3, i32 %x
-  store i32 %s.3, i32* %gep.3, align 2
+  store i32 %s.3, ptr %gep.3, align 2
   ret void
 }
 
-define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) {
+define void @select_uniform_ne_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_uniform_ne_2xi64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], <i64 16383, i64 16383>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]], align 2
+; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l.0 = load i64, i64* %ptr
+  %l.0 = load i64, ptr %ptr
   %cmp.0 = icmp ne i64 %l.0, 16383
   %s.0 = select i1 %cmp.0, i64 %l.0, i64 %x
-  store i64 %s.0, i64* %ptr, align 2
+  store i64 %s.0, ptr %ptr, align 2
 
-  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
-  %l.1 = load i64, i64* %gep.1
+  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
+  %l.1 = load i64, ptr %gep.1
   %cmp.1 = icmp ne i64 %l.1, 16383
   %s.1 = select i1 %cmp.1, i64 %l.1, i64 %x
-  store i64 %s.1, i64* %gep.1, align 2
+  store i64 %s.1, ptr %gep.1, align 2
 
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
index 2b792da557188..1cdac15dba943 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
@@ -8,11 +8,11 @@ declare void @use(double)
 
 ; The extracts %v1.lane.0 and %v1.lane.1 should be considered free during SLP,
 ; because they will be directly in a vector register on AArch64.
-define void @noop_extracts_first_2_lanes(<2 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @noop_extracts_first_2_lanes(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @noop_extracts_first_2_lanes(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <2 x double>, <2 x double>* [[PTR_1:%.*]], align 8
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
 ; CHECK-NEXT:    [[V2_LANE_3:%.*]] = extractelement <4 x double> [[V_2]], i32 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
@@ -22,15 +22,15 @@ define void @noop_extracts_first_2_lanes(<2 x double>* %ptr.1, <4 x double>* %pt
 ; CHECK-NEXT:    call void @use(double [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    call void @use(double [[TMP4]])
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <2 x double>, <2 x double>* %ptr.1, align 8
+  %v.1 = load <2 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <2 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <2 x double> %v.1, i32 1
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
   %v2.lane.3 = extractelement <4 x double> %v.2, i32 3
 
@@ -43,19 +43,19 @@ bb:
   call void @use(double %v1.lane.0)
   call void @use(double %v1.lane.1)
 
-  store <2 x double> %a.ins.1, <2 x double>* %ptr.1, align 8
+  store <2 x double> %a.ins.1, ptr %ptr.1, align 8
   ret void
 }
 
 ; Extracts of consecutive indices, but 
diff erent vector operand.
-define void @extracts_first_2_lanes_
diff erent_vectors(<2 x double>* %ptr.1, <4 x double>* %ptr.2, <2 x double>* %ptr.3) {
+define void @extracts_first_2_lanes_
diff erent_vectors(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3) {
 ; CHECK-LABEL: @extracts_first_2_lanes_
diff erent_vectors(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <2 x double>, <2 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_0:%.*]] = extractelement <2 x double> [[V_1]], i32 0
-; CHECK-NEXT:    [[V_3:%.*]] = load <2 x double>, <2 x double>* [[PTR_3:%.*]], align 8
+; CHECK-NEXT:    [[V_3:%.*]] = load <2 x double>, ptr [[PTR_3:%.*]], align 8
 ; CHECK-NEXT:    [[V3_LANE_1:%.*]] = extractelement <2 x double> [[V_3]], i32 1
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1
@@ -64,16 +64,16 @@ define void @extracts_first_2_lanes_
diff erent_vectors(<2 x double>* %ptr.1, <4 x
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_0]])
 ; CHECK-NEXT:    call void @use(double [[V3_LANE_1]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <2 x double>, <2 x double>* %ptr.1, align 8
+  %v.1 = load <2 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <2 x double> %v.1, i32 0
-  %v.3 = load <2 x double>, <2 x double>* %ptr.3, align 8
+  %v.3 = load <2 x double>, ptr %ptr.3, align 8
   %v3.lane.1 = extractelement <2 x double> %v.3, i32 1
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
 
   %a.lane.0 = fmul double %v1.lane.0, %v2.lane.2
@@ -85,19 +85,19 @@ bb:
   call void @use(double %v1.lane.0)
   call void @use(double %v3.lane.1)
 
-  store <2 x double> %a.ins.1, <2 x double>* %ptr.1, align 8
+  store <2 x double> %a.ins.1, ptr %ptr.1, align 8
   ret void
 }
 
 ; The extracts %v1.lane.2 and %v1.lane.3 should be considered free during SLP,
 ; because they will be directly in a vector register on AArch64.
-define void @noop_extract_second_2_lanes(<4 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @noop_extract_second_2_lanes(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @noop_extract_second_2_lanes(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <4 x double>, <4 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <4 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2
 ; CHECK-NEXT:    [[V1_LANE_3:%.*]] = extractelement <4 x double> [[V_1]], i32 3
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1
@@ -107,15 +107,15 @@ define void @noop_extract_second_2_lanes(<4 x double>* %ptr.1, <4 x double>* %pt
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_2]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT:    store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <4 x double> [[TMP4]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <4 x double>, <4 x double>* %ptr.1, align 8
+  %v.1 = load <4 x double>, ptr %ptr.1, align 8
   %v1.lane.2 = extractelement <4 x double> %v.1, i32 2
   %v1.lane.3 = extractelement <4 x double> %v.1, i32 3
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
 
   %a.lane.0 = fmul double %v1.lane.2, %v2.lane.2
@@ -126,17 +126,17 @@ bb:
 
   call void @use(double %v1.lane.2)
   call void @use(double %v1.lane.3)
-  store <4 x double> %a.ins.1, <4 x double>* %ptr.1, align 8
+  store <4 x double> %a.ins.1, ptr %ptr.1, align 8
   ret void
 }
 
 ; %v1.lane.0 and %v1.lane.1 are used in reverse-order, so they won't be
 ; directly in a vector register on AArch64.
-define void @extract_reverse_order(<2 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @extract_reverse_order(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @extract_reverse_order(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <2 x double>, <2 x double>* [[PTR_1:%.*]], align 8
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_1:%.*]] = load <2 x double>, ptr [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
@@ -146,15 +146,15 @@ define void @extract_reverse_order(<2 x double>* %ptr.1, <4 x double>* %ptr.2) {
 ; CHECK-NEXT:    call void @use(double [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    call void @use(double [[TMP4]])
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <2 x double>, <2 x double>* %ptr.1, align 8
+  %v.1 = load <2 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <2 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <2 x double> %v.1, i32 1
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
 
   %a.lane.0 = fmul double %v1.lane.1, %v2.lane.2
@@ -166,18 +166,18 @@ bb:
   call void @use(double %v1.lane.0)
   call void @use(double %v1.lane.1)
 
-  store <2 x double> %a.ins.1, <2 x double>* %ptr.1, align 8
+  store <2 x double> %a.ins.1, ptr %ptr.1, align 8
   ret void
 }
 
 ; %v1.lane.1 and %v1.lane.2 are extracted from 
diff erent vector registers on AArch64.
-define void @extract_lanes_1_and_2(<4 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @extract_lanes_1_and_2(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @extract_lanes_1_and_2(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <4 x double>, <4 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <4 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_1:%.*]] = extractelement <4 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <4 x double> [[V_1]], i32 2
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1
@@ -187,15 +187,15 @@ define void @extract_lanes_1_and_2(<4 x double>* %ptr.1, <4 x double>* %ptr.2) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_1]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_2]])
-; CHECK-NEXT:    store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <4 x double> [[TMP4]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <4 x double>, <4 x double>* %ptr.1, align 8
+  %v.1 = load <4 x double>, ptr %ptr.1, align 8
   %v1.lane.1 = extractelement <4 x double> %v.1, i32 1
   %v1.lane.2 = extractelement <4 x double> %v.1, i32 2
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
 
   %a.lane.0 = fmul double %v1.lane.1, %v2.lane.2
@@ -207,22 +207,22 @@ bb:
   call void @use(double %v1.lane.1)
   call void @use(double %v1.lane.2)
 
-  store <4 x double> %a.ins.1, <4 x double>* %ptr.1, align 8
+  store <4 x double> %a.ins.1, ptr %ptr.1, align 8
   ret void
 }
 
 ; More complex case where the extracted lanes are directly from a vector
 ; register on AArch64 and should be considered free, because we can
 ; directly use the source vector register.
-define void @noop_extracts_existing_vector_4_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @noop_extracts_existing_vector_4_lanes(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, <9 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0
 ; CHECK-NEXT:    [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2
 ; CHECK-NEXT:    [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
 ; CHECK-NEXT:    [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
@@ -239,16 +239,16 @@ define void @noop_extracts_existing_vector_4_lanes(<9 x double>* %ptr.1, <4 x do
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_1]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_2]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT:    store <9 x double> [[TMP7]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[TMP7]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <9 x double>, <9 x double>* %ptr.1, align 8
+  %v.1 = load <9 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <9 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <9 x double> %v.1, i32 1
   %v1.lane.2 = extractelement <9 x double> %v.1, i32 2
   %v1.lane.3 = extractelement <9 x double> %v.1, i32 3
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.0 = extractelement <4 x double> %v.2, i32 0
   %v2.lane.1 = extractelement <4 x double> %v.2, i32 1
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
@@ -264,21 +264,21 @@ bb:
   call void @use(double %v1.lane.1)
   call void @use(double %v1.lane.2)
   call void @use(double %v1.lane.3)
-  store <9 x double> %a.ins.3, <9 x double>* %ptr.1, align 8
+  store <9 x double> %a.ins.3, ptr %ptr.1, align 8
   ret void
 }
 
 ; Extracted lanes are not used in the right order, so we cannot reuse the
 ; source vector registers directly.
-define void @extracts_jumbled_4_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @extracts_jumbled_4_lanes(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @extracts_jumbled_4_lanes(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, <9 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0
 ; CHECK-NEXT:    [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2
 ; CHECK-NEXT:    [[V1_LANE_3:%.*]] = extractelement <9 x double> [[V_1]], i32 3
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
 ; CHECK-NEXT:    [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
@@ -296,16 +296,16 @@ define void @extracts_jumbled_4_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_1]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_2]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT:    store <9 x double> [[TMP8]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[TMP8]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <9 x double>, <9 x double>* %ptr.1, align 8
+  %v.1 = load <9 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <9 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <9 x double> %v.1, i32 1
   %v1.lane.2 = extractelement <9 x double> %v.1, i32 2
   %v1.lane.3 = extractelement <9 x double> %v.1, i32 3
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.0 = extractelement <4 x double> %v.2, i32 0
   %v2.lane.1 = extractelement <4 x double> %v.2, i32 1
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
@@ -321,7 +321,7 @@ bb:
   call void @use(double %v1.lane.1)
   call void @use(double %v1.lane.2)
   call void @use(double %v1.lane.3)
-  store <9 x double> %a.ins.3, <9 x double>* %ptr.1, align 8
+  store <9 x double> %a.ins.3, ptr %ptr.1, align 8
   ret void
 }
 
@@ -329,10 +329,10 @@ bb:
 ; Even more complex case where the extracted lanes are directly from a vector
 ; register on AArch64 and should be considered free, because we can
 ; directly use the source vector register.
-define void @noop_extracts_9_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @noop_extracts_9_lanes(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, <9 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0
 ; CHECK-NEXT:    [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2
@@ -342,7 +342,7 @@ define void @noop_extracts_9_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
 ; CHECK-NEXT:    [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6
 ; CHECK-NEXT:    [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7
 ; CHECK-NEXT:    [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
 ; CHECK-NEXT:    [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
@@ -379,11 +379,11 @@ define void @noop_extracts_9_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
 ; CHECK-NEXT:    [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8
 ; CHECK-NEXT:    [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]]
-; CHECK-NEXT:    store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[RES]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <9 x double>, <9 x double>* %ptr.1, align 8
+  %v.1 = load <9 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <9 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <9 x double> %v.1, i32 1
   %v1.lane.2 = extractelement <9 x double> %v.1, i32 2
@@ -394,7 +394,7 @@ bb:
   %v1.lane.7 = extractelement <9 x double> %v.1, i32 7
   %v1.lane.8 = extractelement <9 x double> %v.1, i32 8
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.0 = extractelement <4 x double> %v.2, i32 0
   %v2.lane.1 = extractelement <4 x double> %v.2, i32 1
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
@@ -440,16 +440,16 @@ bb:
   %b.ins.8 = insertelement <9 x double> %b.ins.7, double %b.lane.8, i32 8
 
   %res = fsub <9 x double> %a.ins.8, %b.ins.8
-  store <9 x double> %res, <9 x double>* %ptr.1, align 8
+  store <9 x double> %res, ptr %ptr.1, align 8
   ret void
 }
 
 ; Extracted lanes used in first fmul chain are not used in the right order, so
 ; we cannot reuse the source vector registers directly.
-define void @first_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @first_mul_chain_jumbled(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, <9 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0
 ; CHECK-NEXT:    [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2
@@ -459,7 +459,7 @@ define void @first_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x double>* %ptr.2)
 ; CHECK-NEXT:    [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6
 ; CHECK-NEXT:    [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7
 ; CHECK-NEXT:    [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
 ; CHECK-NEXT:    [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
@@ -492,11 +492,11 @@ define void @first_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x double>* %ptr.2)
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
 ; CHECK-NEXT:    [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP22]], double [[B_LANE_8]], i32 8
 ; CHECK-NEXT:    [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]]
-; CHECK-NEXT:    store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[RES]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <9 x double>, <9 x double>* %ptr.1, align 8
+  %v.1 = load <9 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <9 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <9 x double> %v.1, i32 1
   %v1.lane.2 = extractelement <9 x double> %v.1, i32 2
@@ -507,7 +507,7 @@ bb:
   %v1.lane.7 = extractelement <9 x double> %v.1, i32 7
   %v1.lane.8 = extractelement <9 x double> %v.1, i32 8
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.0 = extractelement <4 x double> %v.2, i32 0
   %v2.lane.1 = extractelement <4 x double> %v.2, i32 1
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
@@ -553,16 +553,16 @@ bb:
   %b.ins.8 = insertelement <9 x double> %b.ins.7, double %b.lane.8, i32 8
 
   %res = fsub <9 x double> %a.ins.8, %b.ins.8
-  store <9 x double> %res, <9 x double>* %ptr.1, align 8
+  store <9 x double> %res, ptr %ptr.1, align 8
   ret void
 }
 
 ; Extracted lanes used in both fmul chain are not used in the right order, so
 ; we cannot reuse the source vector registers directly.
-define void @first_and_second_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
+define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) {
 ; CHECK-LABEL: @first_and_second_mul_chain_jumbled(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, <9 x double>* [[PTR_1:%.*]], align 8
+; CHECK-NEXT:    [[V_1:%.*]] = load <9 x double>, ptr [[PTR_1:%.*]], align 8
 ; CHECK-NEXT:    [[V1_LANE_0:%.*]] = extractelement <9 x double> [[V_1]], i32 0
 ; CHECK-NEXT:    [[V1_LANE_1:%.*]] = extractelement <9 x double> [[V_1]], i32 1
 ; CHECK-NEXT:    [[V1_LANE_2:%.*]] = extractelement <9 x double> [[V_1]], i32 2
@@ -572,7 +572,7 @@ define void @first_and_second_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x doubl
 ; CHECK-NEXT:    [[V1_LANE_6:%.*]] = extractelement <9 x double> [[V_1]], i32 6
 ; CHECK-NEXT:    [[V1_LANE_7:%.*]] = extractelement <9 x double> [[V_1]], i32 7
 ; CHECK-NEXT:    [[V1_LANE_8:%.*]] = extractelement <9 x double> [[V_1]], i32 8
-; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16
+; CHECK-NEXT:    [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
 ; CHECK-NEXT:    [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
 ; CHECK-NEXT:    [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
 ; CHECK-NEXT:    [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
@@ -609,11 +609,11 @@ define void @first_and_second_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x doubl
 ; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
 ; CHECK-NEXT:    [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP25]], double [[B_LANE_8]], i32 8
 ; CHECK-NEXT:    [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]]
-; CHECK-NEXT:    store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[RES]], ptr [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %v.1 = load <9 x double>, <9 x double>* %ptr.1, align 8
+  %v.1 = load <9 x double>, ptr %ptr.1, align 8
   %v1.lane.0 = extractelement <9 x double> %v.1, i32 0
   %v1.lane.1 = extractelement <9 x double> %v.1, i32 1
   %v1.lane.2 = extractelement <9 x double> %v.1, i32 2
@@ -624,7 +624,7 @@ bb:
   %v1.lane.7 = extractelement <9 x double> %v.1, i32 7
   %v1.lane.8 = extractelement <9 x double> %v.1, i32 8
 
-  %v.2 = load <4 x double>, <4 x double>* %ptr.2, align 16
+  %v.2 = load <4 x double>, ptr %ptr.2, align 16
   %v2.lane.0 = extractelement <4 x double> %v.2, i32 0
   %v2.lane.1 = extractelement <4 x double> %v.2, i32 1
   %v2.lane.2 = extractelement <4 x double> %v.2, i32 2
@@ -670,6 +670,6 @@ bb:
   %b.ins.8 = insertelement <9 x double> %b.ins.7, double %b.lane.8, i32 8
 
   %res = fsub <9 x double> %a.ins.8, %b.ins.8
-  store <9 x double> %res, <9 x double>* %ptr.1, align 8
+  store <9 x double> %res, ptr %ptr.1, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
index 7fa38f9747d64..a122d84629c1a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
@@ -8,55 +8,51 @@ target triple = "aarch64"
 ; There are no 'or' operations, so it can't be a bswap or
 ; other pattern that we are expecting the backend to handle.
 
-define void @PR50256(i8* %a, i16* %b, i32 %n) {
+define void @PR50256(ptr %a, ptr %b, i32 %n) {
 ; CHECK-LABEL: @PR50256(
-; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 8
-; CHECK-NEXT:    [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i64 8
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[A]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[A]], align 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[B]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX_8]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_8]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = zext <8 x i8> [[TMP7]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3_8]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* [[TMP10]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP4]], ptr [[B]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr [[ARRAYIDX3_8]], align 2
 ; CHECK-NEXT:    ret void
 ;
-  %arrayidx.1 = getelementptr inbounds i8, i8* %a, i64 1
-  %arrayidx.2 = getelementptr inbounds i8, i8* %a, i64 2
-  %arrayidx.3 = getelementptr inbounds i8, i8* %a, i64 3
-  %arrayidx.4 = getelementptr inbounds i8, i8* %a, i64 4
-  %arrayidx.5 = getelementptr inbounds i8, i8* %a, i64 5
-  %arrayidx.7 = getelementptr inbounds i8, i8* %a, i64 7
-  %arrayidx.6 = getelementptr inbounds i8, i8* %a, i64 6
-  %arrayidx.8 = getelementptr inbounds i8, i8* %a, i64 8
-  %arrayidx.9 = getelementptr inbounds i8, i8* %a, i64 9
-  %arrayidx.10 = getelementptr inbounds i8, i8* %a, i64 10
-  %arrayidx.11 = getelementptr inbounds i8, i8* %a, i64 11
-  %arrayidx.12 = getelementptr inbounds i8, i8* %a, i64 12
-  %arrayidx.13 = getelementptr inbounds i8, i8* %a, i64 13
-  %arrayidx.14 = getelementptr inbounds i8, i8* %a, i64 14
-  %arrayidx.15 = getelementptr inbounds i8, i8* %a, i64 15
-  %i = load i8, i8* %a, align 1
-  %i1 = load i8, i8* %arrayidx.1, align 1
-  %i2 = load i8, i8* %arrayidx.2, align 1
-  %i3 = load i8, i8* %arrayidx.3, align 1
-  %i4 = load i8, i8* %arrayidx.4, align 1
-  %i5 = load i8, i8* %arrayidx.5, align 1
-  %i6 = load i8, i8* %arrayidx.6, align 1
-  %i7 = load i8, i8* %arrayidx.7, align 1
-  %i8 = load i8, i8* %arrayidx.8, align 1
-  %i9 = load i8, i8* %arrayidx.9, align 1
-  %i10 = load i8, i8* %arrayidx.10, align 1
-  %i11 = load i8, i8* %arrayidx.11, align 1
-  %i12 = load i8, i8* %arrayidx.12, align 1
-  %i13 = load i8, i8* %arrayidx.13, align 1
-  %i14 = load i8, i8* %arrayidx.14, align 1
-  %i15 = load i8, i8* %arrayidx.15, align 1
+  %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 1
+  %arrayidx.2 = getelementptr inbounds i8, ptr %a, i64 2
+  %arrayidx.3 = getelementptr inbounds i8, ptr %a, i64 3
+  %arrayidx.4 = getelementptr inbounds i8, ptr %a, i64 4
+  %arrayidx.5 = getelementptr inbounds i8, ptr %a, i64 5
+  %arrayidx.7 = getelementptr inbounds i8, ptr %a, i64 7
+  %arrayidx.6 = getelementptr inbounds i8, ptr %a, i64 6
+  %arrayidx.8 = getelementptr inbounds i8, ptr %a, i64 8
+  %arrayidx.9 = getelementptr inbounds i8, ptr %a, i64 9
+  %arrayidx.10 = getelementptr inbounds i8, ptr %a, i64 10
+  %arrayidx.11 = getelementptr inbounds i8, ptr %a, i64 11
+  %arrayidx.12 = getelementptr inbounds i8, ptr %a, i64 12
+  %arrayidx.13 = getelementptr inbounds i8, ptr %a, i64 13
+  %arrayidx.14 = getelementptr inbounds i8, ptr %a, i64 14
+  %arrayidx.15 = getelementptr inbounds i8, ptr %a, i64 15
+  %i = load i8, ptr %a, align 1
+  %i1 = load i8, ptr %arrayidx.1, align 1
+  %i2 = load i8, ptr %arrayidx.2, align 1
+  %i3 = load i8, ptr %arrayidx.3, align 1
+  %i4 = load i8, ptr %arrayidx.4, align 1
+  %i5 = load i8, ptr %arrayidx.5, align 1
+  %i6 = load i8, ptr %arrayidx.6, align 1
+  %i7 = load i8, ptr %arrayidx.7, align 1
+  %i8 = load i8, ptr %arrayidx.8, align 1
+  %i9 = load i8, ptr %arrayidx.9, align 1
+  %i10 = load i8, ptr %arrayidx.10, align 1
+  %i11 = load i8, ptr %arrayidx.11, align 1
+  %i12 = load i8, ptr %arrayidx.12, align 1
+  %i13 = load i8, ptr %arrayidx.13, align 1
+  %i14 = load i8, ptr %arrayidx.14, align 1
+  %i15 = load i8, ptr %arrayidx.15, align 1
   %conv5 = zext i8 %i to i16
   %conv5.1 = zext i8 %i1 to i16
   %conv5.2 = zext i8 %i2 to i16
@@ -89,36 +85,36 @@ define void @PR50256(i8* %a, i16* %b, i32 %n) {
   %shl.13 = shl nuw i16 %conv5.13, 8
   %shl.14 = shl nuw i16 %conv5.14, 8
   %shl.15 = shl nuw i16 %conv5.15, 8
-  %arrayidx3.1 = getelementptr inbounds i16, i16* %b, i64 1
-  %arrayidx3.2 = getelementptr inbounds i16, i16* %b, i64 2
-  %arrayidx3.3 = getelementptr inbounds i16, i16* %b, i64 3
-  %arrayidx3.4 = getelementptr inbounds i16, i16* %b, i64 4
-  %arrayidx3.5 = getelementptr inbounds i16, i16* %b, i64 5
-  %arrayidx3.6 = getelementptr inbounds i16, i16* %b, i64 6
-  %arrayidx3.7 = getelementptr inbounds i16, i16* %b, i64 7
-  %arrayidx3.8 = getelementptr inbounds i16, i16* %b, i64 8
-  %arrayidx3.9 = getelementptr inbounds i16, i16* %b, i64 9
-  %arrayidx3.10 = getelementptr inbounds i16, i16* %b, i64 10
-  %arrayidx3.11 = getelementptr inbounds i16, i16* %b, i64 11
-  %arrayidx3.12 = getelementptr inbounds i16, i16* %b, i64 12
-  %arrayidx3.13 = getelementptr inbounds i16, i16* %b, i64 13
-  %arrayidx3.14 = getelementptr inbounds i16, i16* %b, i64 14
-  %arrayidx3.15 = getelementptr inbounds i16, i16* %b, i64 15
-  store i16 %shl, i16* %b, align 2
-  store i16 %shl.1, i16* %arrayidx3.1, align 2
-  store i16 %shl.2, i16* %arrayidx3.2, align 2
-  store i16 %shl.3, i16* %arrayidx3.3, align 2
-  store i16 %shl.4, i16* %arrayidx3.4, align 2
-  store i16 %shl.5, i16* %arrayidx3.5, align 2
-  store i16 %shl.6, i16* %arrayidx3.6, align 2
-  store i16 %shl.7, i16* %arrayidx3.7, align 2
-  store i16 %shl.8, i16* %arrayidx3.8, align 2
-  store i16 %shl.9, i16* %arrayidx3.9, align 2
-  store i16 %shl.10, i16* %arrayidx3.10, align 2
-  store i16 %shl.11, i16* %arrayidx3.11, align 2
-  store i16 %shl.12, i16* %arrayidx3.12, align 2
-  store i16 %shl.13, i16* %arrayidx3.13, align 2
-  store i16 %shl.14, i16* %arrayidx3.14, align 2
-  store i16 %shl.15, i16* %arrayidx3.15, align 2
+  %arrayidx3.1 = getelementptr inbounds i16, ptr %b, i64 1
+  %arrayidx3.2 = getelementptr inbounds i16, ptr %b, i64 2
+  %arrayidx3.3 = getelementptr inbounds i16, ptr %b, i64 3
+  %arrayidx3.4 = getelementptr inbounds i16, ptr %b, i64 4
+  %arrayidx3.5 = getelementptr inbounds i16, ptr %b, i64 5
+  %arrayidx3.6 = getelementptr inbounds i16, ptr %b, i64 6
+  %arrayidx3.7 = getelementptr inbounds i16, ptr %b, i64 7
+  %arrayidx3.8 = getelementptr inbounds i16, ptr %b, i64 8
+  %arrayidx3.9 = getelementptr inbounds i16, ptr %b, i64 9
+  %arrayidx3.10 = getelementptr inbounds i16, ptr %b, i64 10
+  %arrayidx3.11 = getelementptr inbounds i16, ptr %b, i64 11
+  %arrayidx3.12 = getelementptr inbounds i16, ptr %b, i64 12
+  %arrayidx3.13 = getelementptr inbounds i16, ptr %b, i64 13
+  %arrayidx3.14 = getelementptr inbounds i16, ptr %b, i64 14
+  %arrayidx3.15 = getelementptr inbounds i16, ptr %b, i64 15
+  store i16 %shl, ptr %b, align 2
+  store i16 %shl.1, ptr %arrayidx3.1, align 2
+  store i16 %shl.2, ptr %arrayidx3.2, align 2
+  store i16 %shl.3, ptr %arrayidx3.3, align 2
+  store i16 %shl.4, ptr %arrayidx3.4, align 2
+  store i16 %shl.5, ptr %arrayidx3.5, align 2
+  store i16 %shl.6, ptr %arrayidx3.6, align 2
+  store i16 %shl.7, ptr %arrayidx3.7, align 2
+  store i16 %shl.8, ptr %arrayidx3.8, align 2
+  store i16 %shl.9, ptr %arrayidx3.9, align 2
+  store i16 %shl.10, ptr %arrayidx3.10, align 2
+  store i16 %shl.11, ptr %arrayidx3.11, align 2
+  store i16 %shl.12, ptr %arrayidx3.12, align 2
+  store i16 %shl.13, ptr %arrayidx3.13, align 2
+  store i16 %shl.14, ptr %arrayidx3.14, align 2
+  store i16 %shl.15, ptr %arrayidx3.15, align 2
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
index 2651fe6ff0122..63eaa8bdc2428 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
@@ -9,162 +9,162 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
 define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 {
 ; CHECK-LABEL: @slp_scev_assert(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = addrspacecast i8 addrspace(5)* undef to i8*
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* undef, i32 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP]], i64 [[TMP3:%.*]]
-; CHECK-NEXT:    store i8 0, i8 addrspace(5)* [[TMP2]], align 1
-; CHECK-NEXT:    store i8 0, i8* [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP:%.*]] = addrspacecast ptr addrspace(5) undef to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) undef, i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 [[TMP3:%.*]]
+; CHECK-NEXT:    store i8 0, ptr addrspace(5) [[TMP2]], align 1
+; CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %tmp = addrspacecast i8 addrspace(5)* undef to i8*
-  %tmp2 = getelementptr inbounds i8, i8 addrspace(5)* undef, i32 %idx
-  %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 %tmp3
-  store i8 0, i8 addrspace(5)* %tmp2
-  store i8 0, i8* %tmp4
+  %tmp = addrspacecast ptr addrspace(5) undef to ptr
+  %tmp2 = getelementptr inbounds i8, ptr addrspace(5) undef, i32 %idx
+  %tmp4 = getelementptr inbounds i8, ptr %tmp, i64 %tmp3
+  store i8 0, ptr addrspace(5) %tmp2
+  store i8 0, ptr %tmp4
   ret void
 }
 
-define void @multi_as_reduction_
diff erent_sized(i32 addrspace(3)* %lds, i32 %idx0, i64 %idx1) #0 {
+define void @multi_as_reduction_
diff erent_sized(ptr addrspace(3) %lds, i32 %idx0, i64 %idx1) #0 {
 ; CHECK-LABEL: @multi_as_reduction_
diff erent_sized(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast i32 addrspace(3)* [[LDS:%.*]] to i32*
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
 ; CHECK-NEXT:    [[ADD0:%.*]] = add i32 [[IDX0:%.*]], 2
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
-; CHECK-NEXT:    [[LDS_1:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* [[LDS]], i32 [[ADD0]]
-; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, i32* [[FLAT]], i64 [[ADD1]]
-; CHECK-NEXT:    [[LOAD_LDS_0:%.*]] = load i32, i32 addrspace(3)* [[LDS]], align 4
-; CHECK-NEXT:    [[LOAD_LDS_1:%.*]] = load i32, i32 addrspace(3)* [[LDS_1]], align 4
-; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, i32* [[FLAT]], align 4
-; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
+; CHECK-NEXT:    [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i32 [[ADD0]]
+; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
+; CHECK-NEXT:    [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
+; CHECK-NEXT:    [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
+; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
+; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB0]], ptr undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], ptr undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %flat = addrspacecast i32 addrspace(3)* %lds to i32*
+  %flat = addrspacecast ptr addrspace(3) %lds to ptr
   %add0 = add i32 %idx0, 2
   %add1 = add i64 %idx1, 1
 
-  %lds.1 = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 %add0
-  %flat.1 = getelementptr inbounds i32, i32* %flat, i64 %add1
+  %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 %add0
+  %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
 
-  %load.lds.0 = load i32, i32 addrspace(3)* %lds, align 4
-  %load.lds.1 = load i32, i32 addrspace(3)* %lds.1, align 4
+  %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
+  %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4
 
-  %load.flat.0 = load i32, i32* %flat, align 4
-  %load.flat.1 = load i32, i32* %flat.1, align 4
+  %load.flat.0 = load i32, ptr %flat, align 4
+  %load.flat.1 = load i32, ptr %flat.1, align 4
 
   %sub0 = sub i32 %load.flat.0, %load.lds.0
   %sub1 = sub i32 %load.flat.1, %load.lds.1
 
-  store i32 %sub0, i32* undef
-  store i32 %sub1, i32* undef
+  store i32 %sub0, ptr undef
+  store i32 %sub1, ptr undef
   ret void
 }
 
 ; This should vectorize if using getUnderlyingObject
-define void @multi_as_reduction_same_size(i32 addrspace(1)* %global, i64 %idx0, i64 %idx1) #0 {
+define void @multi_as_reduction_same_size(ptr addrspace(1) %global, i64 %idx0, i64 %idx1) #0 {
 ; CHECK-LABEL: @multi_as_reduction_same_size(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast i32 addrspace(1)* [[GLOBAL:%.*]] to i32*
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL:%.*]] to ptr
 ; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
-; CHECK-NEXT:    [[GLOBAL_1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[GLOBAL]], i64 [[ADD0]]
-; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, i32* [[FLAT]], i64 [[ADD1]]
-; CHECK-NEXT:    [[LOAD_GLOBAL_0:%.*]] = load i32, i32 addrspace(1)* [[GLOBAL]], align 4
-; CHECK-NEXT:    [[LOAD_GLOBAL_1:%.*]] = load i32, i32 addrspace(1)* [[GLOBAL_1]], align 4
-; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, i32* [[FLAT]], align 4
-; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
+; CHECK-NEXT:    [[GLOBAL_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[GLOBAL]], i64 [[ADD0]]
+; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
+; CHECK-NEXT:    [[LOAD_GLOBAL_0:%.*]] = load i32, ptr addrspace(1) [[GLOBAL]], align 4
+; CHECK-NEXT:    [[LOAD_GLOBAL_1:%.*]] = load i32, ptr addrspace(1) [[GLOBAL_1]], align 4
+; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
+; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB0]], ptr undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], ptr undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %flat = addrspacecast i32 addrspace(1)* %global to i32*
+  %flat = addrspacecast ptr addrspace(1) %global to ptr
   %add0 = add i64 %idx0, 2
   %add1 = add i64 %idx1, 1
 
-  %global.1 = getelementptr inbounds i32, i32 addrspace(1)* %global, i64 %add0
-  %flat.1 = getelementptr inbounds i32, i32* %flat, i64 %add1
+  %global.1 = getelementptr inbounds i32, ptr addrspace(1) %global, i64 %add0
+  %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
 
-  %load.global.0 = load i32, i32 addrspace(1)* %global, align 4
-  %load.global.1 = load i32, i32 addrspace(1)* %global.1, align 4
+  %load.global.0 = load i32, ptr addrspace(1) %global, align 4
+  %load.global.1 = load i32, ptr addrspace(1) %global.1, align 4
 
-  %load.flat.0 = load i32, i32* %flat, align 4
-  %load.flat.1 = load i32, i32* %flat.1, align 4
+  %load.flat.0 = load i32, ptr %flat, align 4
+  %load.flat.1 = load i32, ptr %flat.1, align 4
 
   %sub0 = sub i32 %load.flat.0, %load.global.0
   %sub1 = sub i32 %load.flat.1, %load.global.1
 
-  store i32 %sub0, i32* undef
-  store i32 %sub1, i32* undef
+  store i32 %sub0, ptr undef
+  store i32 %sub1, ptr undef
   ret void
 }
 
 ; This should vectorize if using getUnderlyingObject
 ; The add is done in the same width, even though the address space size is smaller
-define void @multi_as_reduction_
diff erent_sized_noncanon(i32 addrspace(3)* %lds, i64 %idx0, i64 %idx1) #0 {
+define void @multi_as_reduction_
diff erent_sized_noncanon(ptr addrspace(3) %lds, i64 %idx0, i64 %idx1) #0 {
 ; CHECK-LABEL: @multi_as_reduction_
diff erent_sized_noncanon(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast i32 addrspace(3)* [[LDS:%.*]] to i32*
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
 ; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
-; CHECK-NEXT:    [[LDS_1:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* [[LDS]], i64 [[ADD0]]
-; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, i32* [[FLAT]], i64 [[ADD1]]
-; CHECK-NEXT:    [[LOAD_LDS_0:%.*]] = load i32, i32 addrspace(3)* [[LDS]], align 4
-; CHECK-NEXT:    [[LOAD_LDS_1:%.*]] = load i32, i32 addrspace(3)* [[LDS_1]], align 4
-; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, i32* [[FLAT]], align 4
-; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
+; CHECK-NEXT:    [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i64 [[ADD0]]
+; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
+; CHECK-NEXT:    [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
+; CHECK-NEXT:    [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
+; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
+; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB0]], ptr undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], ptr undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %flat = addrspacecast i32 addrspace(3)* %lds to i32*
+  %flat = addrspacecast ptr addrspace(3) %lds to ptr
   %add0 = add i64 %idx0, 2
   %add1 = add i64 %idx1, 1
 
-  %lds.1 = getelementptr inbounds i32, i32 addrspace(3)* %lds, i64 %add0
-  %flat.1 = getelementptr inbounds i32, i32* %flat, i64 %add1
+  %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i64 %add0
+  %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
 
-  %load.lds.0 = load i32, i32 addrspace(3)* %lds, align 4
-  %load.lds.1 = load i32, i32 addrspace(3)* %lds.1, align 4
+  %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
+  %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4
 
-  %load.flat.0 = load i32, i32* %flat, align 4
-  %load.flat.1 = load i32, i32* %flat.1, align 4
+  %load.flat.0 = load i32, ptr %flat, align 4
+  %load.flat.1 = load i32, ptr %flat.1, align 4
 
   %sub0 = sub i32 %load.flat.0, %load.lds.0
   %sub1 = sub i32 %load.flat.1, %load.lds.1
 
-  store i32 %sub0, i32* undef
-  store i32 %sub1, i32* undef
+  store i32 %sub0, ptr undef
+  store i32 %sub1, ptr undef
   ret void
 }
 
 define void @slp_crash_on_addrspacecast() {
 ; CHECK-LABEL: @slp_crash_on_addrspacecast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
-; CHECK-NEXT:    [[P0:%.*]] = addrspacecast i64 addrspace(3)* [[TMP0]] to i64*
-; CHECK-NEXT:    store i64 undef, i64* [[P0]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
-; CHECK-NEXT:    [[P1:%.*]] = addrspacecast i64 addrspace(3)* [[TMP1]] to i64*
-; CHECK-NEXT:    store i64 undef, i64* [[P1]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
+; CHECK-NEXT:    [[P0:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
+; CHECK-NEXT:    store i64 undef, ptr [[P0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
+; CHECK-NEXT:    [[P1:%.*]] = addrspacecast ptr addrspace(3) [[TMP1]] to ptr
+; CHECK-NEXT:    store i64 undef, ptr [[P1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
-  %p0 = addrspacecast i64 addrspace(3)* %0 to i64*
-  store i64 undef, i64* %p0, align 8
-  %1 = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
-  %p1 = addrspacecast i64 addrspace(3)* %1 to i64*
-  store i64 undef, i64* %p1, align 8
+  %0 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
+  %p0 = addrspacecast ptr addrspace(3) %0 to ptr
+  store i64 undef, ptr %p0, align 8
+  %1 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
+  %p1 = addrspacecast ptr addrspace(3) %1 to ptr
+  store i64 undef, ptr %p1, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
index a17939743659a..72fb03e60a297 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
@@ -15,83 +15,83 @@
 ; Tests whether the min/max reduction pattern is vectorized if SLP starts at the store.
 define i32 @smaxv6() {
 ; GFX9-LABEL: @smaxv6(
-; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
+; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @arr, align 16
 ; GFX9-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
 ; GFX9-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
 ; GFX9-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], i32 [[TMP2]], i32 [[TMP3]]
-; GFX9-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
+; GFX9-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; GFX9-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
 ; GFX9-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP5]], [[SELECT1]]
 ; GFX9-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP5]], i32 [[SELECT1]]
 ; GFX9-NEXT:    [[STORE_SELECT:%.*]] = select i1 [[CMP1]], i32 3, i32 4
-; GFX9-NEXT:    store i32 [[STORE_SELECT]], i32* @var, align 8
+; GFX9-NEXT:    store i32 [[STORE_SELECT]], ptr @var, align 8
 ; GFX9-NEXT:    ret i32 [[OP_RDX1]]
 ;
-  %load1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %load2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %load1 = load i32, ptr @arr, align 16
+  %load2 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %cmp1 = icmp sgt i32 %load1, %load2
   %select1 = select i1 %cmp1, i32 %load1, i32 %load2
 
-  %load3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %load3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %cmp2 = icmp sgt i32 %select1, %load3
   %select2 = select i1 %cmp2, i32 %select1, i32 %load3
 
-  %load4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %load4 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %cmp3 = icmp sgt i32 %select2, %load4
   %select3 = select i1 %cmp3, i32 %select2, i32 %load4
 
-  %load5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %load5 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %cmp4 = icmp sgt i32 %select3, %load5
   %select4 = select i1 %cmp4, i32 %select3, i32 %load5
 
-  %load6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %load6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %cmp5 = icmp sgt i32 %select4, %load6
   %select5 = select i1 %cmp5, i32 %select4, i32 %load6
 
   %store-select = select i1 %cmp1, i32 3, i32 4
-  store i32 %store-select, i32* @var, align 8
+  store i32 %store-select, ptr @var, align 8
   ret i32 %select5
 }
 
 define i64 @sminv6() {
 ; GFX9-LABEL: @sminv6(
-; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([32 x i64]* @arr64 to <2 x i64>*), align 16
+; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @arr64, align 16
 ; GFX9-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; GFX9-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
 ; GFX9-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[TMP2]], [[TMP3]]
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], i64 [[TMP2]], i64 [[TMP3]]
-; GFX9-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 2) to <4 x i64>*), align 16
+; GFX9-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([32 x i64], ptr @arr64, i64 0, i64 2), align 16
 ; GFX9-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]])
 ; GFX9-NEXT:    [[OP_RDX:%.*]] = icmp slt i64 [[TMP5]], [[SELECT1]]
 ; GFX9-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i64 [[TMP5]], i64 [[SELECT1]]
 ; GFX9-NEXT:    [[STORE_SELECT:%.*]] = select i1 [[CMP1]], i64 3, i64 4
-; GFX9-NEXT:    store i64 [[STORE_SELECT]], i64* @var64, align 8
+; GFX9-NEXT:    store i64 [[STORE_SELECT]], ptr @var64, align 8
 ; GFX9-NEXT:    ret i64 [[OP_RDX1]]
 ;
-  %load1 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 0), align 16
-  %load2 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 1), align 8
+  %load1 = load i64, ptr @arr64, align 16
+  %load2 = load i64, ptr getelementptr inbounds ([32 x i64], ptr @arr64, i64 0, i64 1), align 8
   %cmp1 = icmp slt i64 %load1, %load2
   %select1 = select i1 %cmp1, i64 %load1, i64 %load2
 
-  %load3 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 2), align 16
+  %load3 = load i64, ptr getelementptr inbounds ([32 x i64], ptr @arr64, i64 0, i64 2), align 16
   %cmp2 = icmp slt i64 %select1, %load3
   %select2 = select i1 %cmp2, i64 %select1, i64 %load3
 
-  %load4 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 3), align 8
+  %load4 = load i64, ptr getelementptr inbounds ([32 x i64], ptr @arr64, i64 0, i64 3), align 8
   %cmp3 = icmp slt i64 %select2, %load4
   %select3 = select i1 %cmp3, i64 %select2, i64 %load4
 
-  %load5 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 4), align 16
+  %load5 = load i64, ptr getelementptr inbounds ([32 x i64], ptr @arr64, i64 0, i64 4), align 16
   %cmp4 = icmp slt i64 %select3, %load5
   %select4 = select i1 %cmp4, i64 %select3, i64 %load5
 
-  %load6 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 5), align 8
+  %load6 = load i64, ptr getelementptr inbounds ([32 x i64], ptr @arr64, i64 0, i64 5), align 8
   %cmp5 = icmp slt i64 %select4, %load6
   %select5 = select i1 %cmp5, i64 %select4, i64 %load6
 
   %store-select = select i1 %cmp1, i64 3, i64 4
-  store i64 %store-select, i64* @var64, align 8
+  store i64 %store-select, ptr @var64, align 8
   ret i64 %select5
 }
 
@@ -99,50 +99,50 @@ define i64 @sminv6() {
 ; with fastmath on the select.
 define float @fmaxv6() {
 ; GFX9-LABEL: @fmaxv6(
-; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @farr to <2 x float>*), align 16
+; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr @farr, align 16
 ; GFX9-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
 ; GFX9-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
 ; GFX9-NEXT:    [[CMP1:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], float [[TMP2]], float [[TMP3]]
-; GFX9-NEXT:    [[LOAD3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 2), align 8
+; GFX9-NEXT:    [[LOAD3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 2), align 8
 ; GFX9-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[SELECT1]], [[LOAD3]]
 ; GFX9-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP2]], float [[SELECT1]], float [[LOAD3]]
-; GFX9-NEXT:    [[LOAD4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 3), align 4
+; GFX9-NEXT:    [[LOAD4:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 3), align 4
 ; GFX9-NEXT:    [[CMP3:%.*]] = fcmp fast ogt float [[SELECT2]], [[LOAD4]]
 ; GFX9-NEXT:    [[SELECT3:%.*]] = select i1 [[CMP3]], float [[SELECT2]], float [[LOAD4]]
-; GFX9-NEXT:    [[LOAD5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 4), align 16
+; GFX9-NEXT:    [[LOAD5:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 4), align 16
 ; GFX9-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float [[SELECT3]], [[LOAD5]]
 ; GFX9-NEXT:    [[SELECT4:%.*]] = select i1 [[CMP4]], float [[SELECT3]], float [[LOAD5]]
-; GFX9-NEXT:    [[LOAD6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 5), align 4
+; GFX9-NEXT:    [[LOAD6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 5), align 4
 ; GFX9-NEXT:    [[CMP5:%.*]] = fcmp fast ogt float [[SELECT4]], [[LOAD6]]
 ; GFX9-NEXT:    [[SELECT5:%.*]] = select i1 [[CMP5]], float [[SELECT4]], float [[LOAD6]]
 ; GFX9-NEXT:    [[STORE_SELECT:%.*]] = select i1 [[CMP1]], float 3.000000e+00, float 4.000000e+00
-; GFX9-NEXT:    store float [[STORE_SELECT]], float* @fvar, align 8
+; GFX9-NEXT:    store float [[STORE_SELECT]], ptr @fvar, align 8
 ; GFX9-NEXT:    ret float [[SELECT5]]
 ;
-  %load1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 0), align 16
-  %load2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 1), align 4
+  %load1 = load float, ptr @farr, align 16
+  %load2 = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 1), align 4
   %cmp1 = fcmp fast ogt float %load1, %load2
   %select1 = select i1 %cmp1, float %load1, float %load2
 
-  %load3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 2), align 8
+  %load3 = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 2), align 8
   %cmp2 = fcmp fast ogt float %select1, %load3
   %select2 = select i1 %cmp2, float %select1, float %load3
 
-  %load4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 3), align 4
+  %load4 = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 3), align 4
   %cmp3 = fcmp fast ogt float %select2, %load4
   %select3 = select i1 %cmp3, float %select2, float %load4
 
-  %load5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 4), align 16
+  %load5 = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 4), align 16
   %cmp4 = fcmp fast ogt float %select3, %load5
   %select4 = select i1 %cmp4, float %select3, float %load5
 
-  %load6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @farr, i64 0, i64 5), align 4
+  %load6 = load float, ptr getelementptr inbounds ([32 x float], ptr @farr, i64 0, i64 5), align 4
   %cmp5 = fcmp fast ogt float %select4, %load6
   %select5 = select i1 %cmp5, float %select4, float %load6
 
   %store-select = select i1 %cmp1, float 3.0, float 4.0
-  store float %store-select, float* @fvar, align 8
+  store float %store-select, ptr @fvar, align 8
   ret float %select5
 }
 
@@ -150,91 +150,91 @@ define float @fmaxv6() {
 ; with fastmath on the select.
 define double @dminv6() {
 ; GFX9-LABEL: @dminv6(
-; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([32 x double]* @darr to <2 x double>*), align 16
+; GFX9-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @darr, align 16
 ; GFX9-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; GFX9-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 ; GFX9-NEXT:    [[CMP1:%.*]] = fcmp fast olt double [[TMP2]], [[TMP3]]
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], double [[TMP2]], double [[TMP3]]
-; GFX9-NEXT:    [[LOAD3:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 2), align 8
+; GFX9-NEXT:    [[LOAD3:%.*]] = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 2), align 8
 ; GFX9-NEXT:    [[CMP2:%.*]] = fcmp fast olt double [[SELECT1]], [[LOAD3]]
 ; GFX9-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP2]], double [[SELECT1]], double [[LOAD3]]
-; GFX9-NEXT:    [[LOAD4:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 3), align 4
+; GFX9-NEXT:    [[LOAD4:%.*]] = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 3), align 4
 ; GFX9-NEXT:    [[CMP3:%.*]] = fcmp fast olt double [[SELECT2]], [[LOAD4]]
 ; GFX9-NEXT:    [[SELECT3:%.*]] = select i1 [[CMP3]], double [[SELECT2]], double [[LOAD4]]
-; GFX9-NEXT:    [[LOAD5:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 4), align 16
+; GFX9-NEXT:    [[LOAD5:%.*]] = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 4), align 16
 ; GFX9-NEXT:    [[CMP4:%.*]] = fcmp fast olt double [[SELECT3]], [[LOAD5]]
 ; GFX9-NEXT:    [[SELECT4:%.*]] = select i1 [[CMP4]], double [[SELECT3]], double [[LOAD5]]
-; GFX9-NEXT:    [[LOAD6:%.*]] = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 5), align 4
+; GFX9-NEXT:    [[LOAD6:%.*]] = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 5), align 4
 ; GFX9-NEXT:    [[CMP5:%.*]] = fcmp fast olt double [[SELECT4]], [[LOAD6]]
 ; GFX9-NEXT:    [[SELECT5:%.*]] = select i1 [[CMP5]], double [[SELECT4]], double [[LOAD6]]
 ; GFX9-NEXT:    [[STORE_SELECT:%.*]] = select i1 [[CMP1]], double 3.000000e+00, double 4.000000e+00
-; GFX9-NEXT:    store double [[STORE_SELECT]], double* @dvar, align 8
+; GFX9-NEXT:    store double [[STORE_SELECT]], ptr @dvar, align 8
 ; GFX9-NEXT:    ret double [[SELECT5]]
 ;
-  %load1 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 0), align 16
-  %load2 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 1), align 4
+  %load1 = load double, ptr @darr, align 16
+  %load2 = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 1), align 4
   %cmp1 = fcmp fast olt double %load1, %load2
   %select1 = select i1 %cmp1, double %load1, double %load2
 
-  %load3 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 2), align 8
+  %load3 = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 2), align 8
   %cmp2 = fcmp fast olt double %select1, %load3
   %select2 = select i1 %cmp2, double %select1, double %load3
 
-  %load4 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 3), align 4
+  %load4 = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 3), align 4
   %cmp3 = fcmp fast olt double %select2, %load4
   %select3 = select i1 %cmp3, double %select2, double %load4
 
-  %load5 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 4), align 16
+  %load5 = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 4), align 16
   %cmp4 = fcmp fast olt double %select3, %load5
   %select4 = select i1 %cmp4, double %select3, double %load5
 
-  %load6 = load double, double* getelementptr inbounds ([32 x double], [32 x double]* @darr, i64 0, i64 5), align 4
+  %load6 = load double, ptr getelementptr inbounds ([32 x double], ptr @darr, i64 0, i64 5), align 4
   %cmp5 = fcmp fast olt double %select4, %load6
   %select5 = select i1 %cmp5, double %select4, double %load6
 
   %store-select = select i1 %cmp1, double 3.0, double 4.0
-  store double %store-select, double* @dvar, align 8
+  store double %store-select, ptr @dvar, align 8
   ret double %select5
 }
 
 define i32 @smax_w
diff _valuenum(i32, i32 %v1) {
 ; GFX9-LABEL: @smax_w
diff _valuenum(
-; GFX9-NEXT:    [[VLOAD:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
+; GFX9-NEXT:    [[VLOAD:%.*]] = load <2 x i32>, ptr @arr, align 16
 ; GFX9-NEXT:    [[ELT1:%.*]] = extractelement <2 x i32> [[VLOAD]], i32 0
 ; GFX9-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[ELT1]], [[V1:%.*]]
 ; GFX9-NEXT:    [[EX0:%.*]] = extractelement <2 x i32> [[VLOAD]], i32 0
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], i32 [[EX0]], i32 [[V1]]
-; GFX9-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
+; GFX9-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; GFX9-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
 ; GFX9-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP3]], [[SELECT1]]
 ; GFX9-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP3]], i32 [[SELECT1]]
 ; GFX9-NEXT:    [[STOREVAL:%.*]] = select i1 [[CMP1]], i32 3, i32 4
-; GFX9-NEXT:    store i32 [[STOREVAL]], i32* @var, align 8
+; GFX9-NEXT:    store i32 [[STOREVAL]], ptr @var, align 8
 ; GFX9-NEXT:    ret i32 [[OP_RDX1]]
 ;
-  %vload = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
+  %vload = load <2 x i32>, ptr @arr, align 16
   %elt1 = extractelement <2 x i32> %vload, i32 0
   %cmp1 = icmp sgt i32 %elt1, %v1
   %ex0 = extractelement <2 x i32> %vload, i32 0
   %select1 = select i1 %cmp1, i32 %ex0, i32 %v1
 
-  %load3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %load3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %cmp2 = icmp sgt i32 %select1, %load3
   %select2 = select i1 %cmp2, i32 %select1, i32 %load3
 
-  %load4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %load4 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %cmp3 = icmp sgt i32 %select2, %load4
   %select3 = select i1 %cmp3, i32 %select2, i32 %load4
 
-  %load5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %load5 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %cmp4 = icmp sgt i32 %select3, %load5
   %select4 = select i1 %cmp4, i32 %select3, i32 %load5
 
-  %load6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %load6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %cmp5 = icmp sgt i32 %select4, %load6
   %select5 = select i1 %cmp5, i32 %select4, i32 %load6
 
   %storeval = select i1 %cmp1, i32 3, i32 4
-  store i32 %storeval, i32* @var, align 8
+  store i32 %storeval, ptr @var, align 8
   ret i32 %select5
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
index 3996ffe314fe9..7152115220b00 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
@@ -5,252 +5,226 @@
 ; FIXME: Should still like to vectorize the memory operations for VI
 
 ; Simple 3-pair chain with loads and stores
-define amdgpu_kernel void @test1_as_3_3_3_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c) {
+define amdgpu_kernel void @test1_as_3_3_3_v2f16(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c) {
 ; GCN-LABEL: @test1_as_3_3_3_v2f16(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
-; GCN-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(3)* [[B:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
+; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, ptr addrspace(3) [[B:%.*]], align 2
 ; GCN-NEXT:    [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]]
-; GCN-NEXT:    [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP5]], ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
-  %i1 = load half, half addrspace(3)* %b, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
+  %i1 = load half, ptr addrspace(3) %b, align 2
   %mul = fmul half %i0, %i1
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
-  %i4 = load half, half addrspace(3)* %arrayidx4, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr addrspace(3) %b, i64 1
+  %i4 = load half, ptr addrspace(3) %arrayidx4, align 2
   %mul5 = fmul half %i3, %i4
-  store half %mul, half addrspace(3)* %c, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  store half %mul5, half addrspace(3)* %arrayidx5, align 2
+  store half %mul, ptr addrspace(3) %c, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  store half %mul5, ptr addrspace(3) %arrayidx5, align 2
   ret void
 }
 
-define amdgpu_kernel void @test1_as_3_0_0(half addrspace(3)* %a, half* %b, half* %c) {
+define amdgpu_kernel void @test1_as_3_0_0(ptr addrspace(3) %a, ptr %b, ptr %c) {
 ; GCN-LABEL: @test1_as_3_0_0(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
-; GCN-NEXT:    [[TMP3:%.*]] = bitcast half* [[B:%.*]] to <2 x half>*
-; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, <2 x half>* [[TMP3]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
+; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, ptr [[B:%.*]], align 2
 ; GCN-NEXT:    [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]]
-; GCN-NEXT:    [[TMP6:%.*]] = bitcast half* [[C:%.*]] to <2 x half>*
-; GCN-NEXT:    store <2 x half> [[TMP5]], <2 x half>* [[TMP6]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP5]], ptr [[C:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
-  %i1 = load half, half* %b, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
+  %i1 = load half, ptr %b, align 2
   %mul = fmul half %i0, %i1
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half* %b, i64 1
-  %i4 = load half, half* %arrayidx4, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr %b, i64 1
+  %i4 = load half, ptr %arrayidx4, align 2
   %mul5 = fmul half %i3, %i4
-  store half %mul, half* %c, align 2
-  %arrayidx5 = getelementptr inbounds half, half* %c, i64 1
-  store half %mul5, half* %arrayidx5, align 2
+  store half %mul, ptr %c, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr %c, i64 1
+  store half %mul5, ptr %arrayidx5, align 2
   ret void
 }
 
-define amdgpu_kernel void @test1_as_0_0_3_v2f16(half* %a, half* %b, half addrspace(3)* %c) {
+define amdgpu_kernel void @test1_as_0_0_3_v2f16(ptr %a, ptr %b, ptr addrspace(3) %c) {
 ; GCN-LABEL: @test1_as_0_0_3_v2f16(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half* [[A:%.*]] to <2 x half>*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 2
-; GCN-NEXT:    [[TMP3:%.*]] = bitcast half* [[B:%.*]] to <2 x half>*
-; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, <2 x half>* [[TMP3]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[A:%.*]], align 2
+; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, ptr [[B:%.*]], align 2
 ; GCN-NEXT:    [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]]
-; GCN-NEXT:    [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP5]], ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half* %a, align 2
-  %i1 = load half, half* %b, align 2
+  %i0 = load half, ptr %a, align 2
+  %i1 = load half, ptr %b, align 2
   %mul = fmul half %i0, %i1
-  %arrayidx3 = getelementptr inbounds half, half* %a, i64 1
-  %i3 = load half, half* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half* %b, i64 1
-  %i4 = load half, half* %arrayidx4, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr %a, i64 1
+  %i3 = load half, ptr %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr %b, i64 1
+  %i4 = load half, ptr %arrayidx4, align 2
   %mul5 = fmul half %i3, %i4
-  store half %mul, half addrspace(3)* %c, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  store half %mul5, half addrspace(3)* %arrayidx5, align 2
+  store half %mul, ptr addrspace(3) %c, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  store half %mul5, ptr addrspace(3) %arrayidx5, align 2
   ret void
 }
 
-define amdgpu_kernel void @test1_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
+define amdgpu_kernel void @test1_fma_v2f16(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) {
 ; GCN-LABEL: @test1_fma_v2f16(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
-; GCN-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(3)* [[B:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2
-; GCN-NEXT:    [[TMP5:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP6:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP5]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
+; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, ptr addrspace(3) [[B:%.*]], align 2
+; GCN-NEXT:    [[TMP6:%.*]] = load <2 x half>, ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    [[TMP7:%.*]] = call <2 x half> @llvm.fma.v2f16(<2 x half> [[TMP2]], <2 x half> [[TMP4]], <2 x half> [[TMP6]])
-; GCN-NEXT:    [[TMP8:%.*]] = bitcast half addrspace(3)* [[D:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP7]], <2 x half> addrspace(3)* [[TMP8]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP7]], ptr addrspace(3) [[D:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
-  %i1 = load half, half addrspace(3)* %b, align 2
-  %i2 = load half, half addrspace(3)* %c, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
+  %i1 = load half, ptr addrspace(3) %b, align 2
+  %i2 = load half, ptr addrspace(3) %c, align 2
   %fma0 = call half @llvm.fma.f16(half %i0, half %i1, half %i2)
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
-  %i4 = load half, half addrspace(3)* %arrayidx4, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  %i5 = load half, half addrspace(3)* %arrayidx5, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr addrspace(3) %b, i64 1
+  %i4 = load half, ptr addrspace(3) %arrayidx4, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  %i5 = load half, ptr addrspace(3) %arrayidx5, align 2
   %fma1 = call half @llvm.fma.f16(half %i3, half %i4, half %i5)
-  store half %fma0, half addrspace(3)* %d, align 2
-  %arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
-  store half %fma1, half addrspace(3)* %arrayidx6, align 2
+  store half %fma0, ptr addrspace(3) %d, align 2
+  %arrayidx6 = getelementptr inbounds half, ptr addrspace(3) %d, i64 1
+  store half %fma1, ptr addrspace(3) %arrayidx6, align 2
   ret void
 }
 
-define amdgpu_kernel void @mul_scalar_v2f16(half addrspace(3)* %a, half %scalar, half addrspace(3)* %c) {
+define amdgpu_kernel void @mul_scalar_v2f16(ptr addrspace(3) %a, half %scalar, ptr addrspace(3) %c) {
 ; GCN-LABEL: @mul_scalar_v2f16(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
 ; GCN-NEXT:    [[TMP3:%.*]] = insertelement <2 x half> poison, half [[SCALAR:%.*]], i32 0
 ; GCN-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x half> [[TMP3]], <2 x half> poison, <2 x i32> zeroinitializer
 ; GCN-NEXT:    [[TMP4:%.*]] = fmul <2 x half> [[TMP2]], [[SHUFFLE]]
-; GCN-NEXT:    [[TMP5:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP4]], <2 x half> addrspace(3)* [[TMP5]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP4]], ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
   %mul = fmul half %i0, %scalar
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
   %mul5 = fmul half %i3, %scalar
-  store half %mul, half addrspace(3)* %c, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  store half %mul5, half addrspace(3)* %arrayidx5, align 2
+  store half %mul, ptr addrspace(3) %c, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  store half %mul5, ptr addrspace(3) %arrayidx5, align 2
   ret void
 }
 
-define amdgpu_kernel void @fabs_v2f16(half addrspace(3)* %a, half addrspace(3)* %c) {
+define amdgpu_kernel void @fabs_v2f16(ptr addrspace(3) %a, ptr addrspace(3) %c) {
 ; GCN-LABEL: @fabs_v2f16(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
 ; GCN-NEXT:    [[TMP3:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[TMP2]])
-; GCN-NEXT:    [[TMP4:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP3]], <2 x half> addrspace(3)* [[TMP4]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP3]], ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
   %fabs0 = call half @llvm.fabs.f16(half %i0)
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
   %fabs1 = call half @llvm.fabs.f16(half %i3)
-  store half %fabs0, half addrspace(3)* %c, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  store half %fabs1, half addrspace(3)* %arrayidx5, align 2
+  store half %fabs0, ptr addrspace(3) %c, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  store half %fabs1, ptr addrspace(3) %arrayidx5, align 2
   ret void
 }
 
-define amdgpu_kernel void @test1_fabs_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
+define amdgpu_kernel void @test1_fabs_fma_v2f16(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) {
 ; GCN-LABEL: @test1_fabs_fma_v2f16(
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
-; GCN-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(3)* [[B:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2
-; GCN-NEXT:    [[TMP5:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP6:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP5]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
+; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, ptr addrspace(3) [[B:%.*]], align 2
+; GCN-NEXT:    [[TMP6:%.*]] = load <2 x half>, ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    [[TMP7:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[TMP2]])
 ; GCN-NEXT:    [[TMP8:%.*]] = call <2 x half> @llvm.fma.v2f16(<2 x half> [[TMP7]], <2 x half> [[TMP4]], <2 x half> [[TMP6]])
-; GCN-NEXT:    [[TMP9:%.*]] = bitcast half addrspace(3)* [[D:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP8]], <2 x half> addrspace(3)* [[TMP9]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP8]], ptr addrspace(3) [[D:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
-  %i1 = load half, half addrspace(3)* %b, align 2
-  %i2 = load half, half addrspace(3)* %c, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
+  %i1 = load half, ptr addrspace(3) %b, align 2
+  %i2 = load half, ptr addrspace(3) %c, align 2
   %i0.fabs = call half @llvm.fabs.f16(half %i0)
 
   %fma0 = call half @llvm.fma.f16(half %i0.fabs, half %i1, half %i2)
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
-  %i4 = load half, half addrspace(3)* %arrayidx4, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  %i5 = load half, half addrspace(3)* %arrayidx5, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr addrspace(3) %b, i64 1
+  %i4 = load half, ptr addrspace(3) %arrayidx4, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  %i5 = load half, ptr addrspace(3) %arrayidx5, align 2
   %i3.fabs = call half @llvm.fabs.f16(half %i3)
 
   %fma1 = call half @llvm.fma.f16(half %i3.fabs, half %i4, half %i5)
-  store half %fma0, half addrspace(3)* %d, align 2
-  %arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
-  store half %fma1, half addrspace(3)* %arrayidx6, align 2
+  store half %fma0, ptr addrspace(3) %d, align 2
+  %arrayidx6 = getelementptr inbounds half, ptr addrspace(3) %d, i64 1
+  store half %fma1, ptr addrspace(3) %arrayidx6, align 2
   ret void
 }
 
-define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
+define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) {
 ; GCN-LABEL: @test1_fabs_scalar_fma_v2f16(
-; GCN-NEXT:    [[I1:%.*]] = load half, half addrspace(3)* [[B:%.*]], align 2
+; GCN-NEXT:    [[I1:%.*]] = load half, ptr addrspace(3) [[B:%.*]], align 2
 ; GCN-NEXT:    [[I1_FABS:%.*]] = call half @llvm.fabs.f16(half [[I1]])
-; GCN-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds half, half addrspace(3)* [[B]], i64 1
-; GCN-NEXT:    [[I4:%.*]] = load half, half addrspace(3)* [[ARRAYIDX4]], align 2
-; GCN-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
-; GCN-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2
+; GCN-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[B]], i64 1
+; GCN-NEXT:    [[I4:%.*]] = load half, ptr addrspace(3) [[ARRAYIDX4]], align 2
+; GCN-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
+; GCN-NEXT:    [[TMP4:%.*]] = load <2 x half>, ptr addrspace(3) [[C:%.*]], align 2
 ; GCN-NEXT:    [[TMP5:%.*]] = insertelement <2 x half> poison, half [[I1_FABS]], i32 0
 ; GCN-NEXT:    [[TMP6:%.*]] = insertelement <2 x half> [[TMP5]], half [[I4]], i32 1
 ; GCN-NEXT:    [[TMP7:%.*]] = call <2 x half> @llvm.fma.v2f16(<2 x half> [[TMP2]], <2 x half> [[TMP6]], <2 x half> [[TMP4]])
-; GCN-NEXT:    [[TMP8:%.*]] = bitcast half addrspace(3)* [[D:%.*]] to <2 x half> addrspace(3)*
-; GCN-NEXT:    store <2 x half> [[TMP7]], <2 x half> addrspace(3)* [[TMP8]], align 2
+; GCN-NEXT:    store <2 x half> [[TMP7]], ptr addrspace(3) [[D:%.*]], align 2
 ; GCN-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
-  %i1 = load half, half addrspace(3)* %b, align 2
-  %i2 = load half, half addrspace(3)* %c, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
+  %i1 = load half, ptr addrspace(3) %b, align 2
+  %i2 = load half, ptr addrspace(3) %c, align 2
   %i1.fabs = call half @llvm.fabs.f16(half %i1)
 
   %fma0 = call half @llvm.fma.f16(half %i0, half %i1.fabs, half %i2)
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
-  %i4 = load half, half addrspace(3)* %arrayidx4, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  %i5 = load half, half addrspace(3)* %arrayidx5, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr addrspace(3) %b, i64 1
+  %i4 = load half, ptr addrspace(3) %arrayidx4, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  %i5 = load half, ptr addrspace(3) %arrayidx5, align 2
   %fma1 = call half @llvm.fma.f16(half %i3, half %i4, half %i5)
-  store half %fma0, half addrspace(3)* %d, align 2
-  %arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
-  store half %fma1, half addrspace(3)* %arrayidx6, align 2
+  store half %fma0, ptr addrspace(3) %d, align 2
+  %arrayidx6 = getelementptr inbounds half, ptr addrspace(3) %d, i64 1
+  store half %fma1, ptr addrspace(3) %arrayidx6, align 2
   ret void
 }
 
-define amdgpu_kernel void @canonicalize_v2f16(half addrspace(3)* %a, half addrspace(3)* %c) {
+define amdgpu_kernel void @canonicalize_v2f16(ptr addrspace(3) %a, ptr addrspace(3) %c) {
 ; GFX9-LABEL: @canonicalize_v2f16(
-; GFX9-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
-; GFX9-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
+; GFX9-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr addrspace(3) [[A:%.*]], align 2
 ; GFX9-NEXT:    [[TMP3:%.*]] = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> [[TMP2]])
-; GFX9-NEXT:    [[TMP4:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
-; GFX9-NEXT:    store <2 x half> [[TMP3]], <2 x half> addrspace(3)* [[TMP4]], align 2
+; GFX9-NEXT:    store <2 x half> [[TMP3]], ptr addrspace(3) [[C:%.*]], align 2
 ; GFX9-NEXT:    ret void
 ;
 ; VI-LABEL: @canonicalize_v2f16(
-; VI-NEXT:    [[I0:%.*]] = load half, half addrspace(3)* [[A:%.*]], align 2
+; VI-NEXT:    [[I0:%.*]] = load half, ptr addrspace(3) [[A:%.*]], align 2
 ; VI-NEXT:    [[CANONICALIZE0:%.*]] = call half @llvm.canonicalize.f16(half [[I0]])
-; VI-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds half, half addrspace(3)* [[A]], i64 1
-; VI-NEXT:    [[I3:%.*]] = load half, half addrspace(3)* [[ARRAYIDX3]], align 2
+; VI-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[A]], i64 1
+; VI-NEXT:    [[I3:%.*]] = load half, ptr addrspace(3) [[ARRAYIDX3]], align 2
 ; VI-NEXT:    [[CANONICALIZE1:%.*]] = call half @llvm.canonicalize.f16(half [[I3]])
-; VI-NEXT:    store half [[CANONICALIZE0]], half addrspace(3)* [[C:%.*]], align 2
-; VI-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds half, half addrspace(3)* [[C]], i64 1
-; VI-NEXT:    store half [[CANONICALIZE1]], half addrspace(3)* [[ARRAYIDX5]], align 2
+; VI-NEXT:    store half [[CANONICALIZE0]], ptr addrspace(3) [[C:%.*]], align 2
+; VI-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[C]], i64 1
+; VI-NEXT:    store half [[CANONICALIZE1]], ptr addrspace(3) [[ARRAYIDX5]], align 2
 ; VI-NEXT:    ret void
 ;
-  %i0 = load half, half addrspace(3)* %a, align 2
+  %i0 = load half, ptr addrspace(3) %a, align 2
   %canonicalize0 = call half @llvm.canonicalize.f16(half %i0)
-  %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
-  %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr addrspace(3) %a, i64 1
+  %i3 = load half, ptr addrspace(3) %arrayidx3, align 2
   %canonicalize1 = call half @llvm.canonicalize.f16(half %i3)
-  store half %canonicalize0, half addrspace(3)* %c, align 2
-  %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
-  store half %canonicalize1, half addrspace(3)* %arrayidx5, align 2
+  store half %canonicalize0, ptr addrspace(3) %c, align 2
+  %arrayidx5 = getelementptr inbounds half, ptr addrspace(3) %c, i64 1
+  store half %canonicalize1, ptr addrspace(3) %arrayidx5, align 2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f32.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f32.ll
index 388a7063f031f..22b6fb4f0edca 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f32.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f32.ll
@@ -5,19 +5,19 @@
 ; GFX908: fadd float
 ; GFX908: fadd float
 ; GFX90A: fadd <2 x float>
-define amdgpu_kernel void @fadd_combine(float addrspace(1)* %arg) {
+define amdgpu_kernel void @fadd_combine(ptr addrspace(1) %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = zext i32 %tmp to i64
-  %tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp1
-  %tmp3 = load float, float addrspace(1)* %tmp2, align 4
+  %tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp1
+  %tmp3 = load float, ptr addrspace(1) %tmp2, align 4
   %tmp4 = fadd float %tmp3, 1.000000e+00
-  store float %tmp4, float addrspace(1)* %tmp2, align 4
+  store float %tmp4, ptr addrspace(1) %tmp2, align 4
   %tmp5 = add nuw nsw i64 %tmp1, 1
-  %tmp6 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp5
-  %tmp7 = load float, float addrspace(1)* %tmp6, align 4
+  %tmp6 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp5
+  %tmp7 = load float, ptr addrspace(1) %tmp6, align 4
   %tmp8 = fadd float %tmp7, 1.000000e+00
-  store float %tmp8, float addrspace(1)* %tmp6, align 4
+  store float %tmp8, ptr addrspace(1) %tmp6, align 4
   ret void
 }
 
@@ -25,19 +25,19 @@ bb:
 ; GFX908: fmul float
 ; GFX908: fmul float
 ; GFX90A: fmul <2 x float>
-define amdgpu_kernel void @fmul_combine(float addrspace(1)* %arg) {
+define amdgpu_kernel void @fmul_combine(ptr addrspace(1) %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = zext i32 %tmp to i64
-  %tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp1
-  %tmp3 = load float, float addrspace(1)* %tmp2, align 4
+  %tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp1
+  %tmp3 = load float, ptr addrspace(1) %tmp2, align 4
   %tmp4 = fmul float %tmp3, 1.000000e+00
-  store float %tmp4, float addrspace(1)* %tmp2, align 4
+  store float %tmp4, ptr addrspace(1) %tmp2, align 4
   %tmp5 = add nuw nsw i64 %tmp1, 1
-  %tmp6 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp5
-  %tmp7 = load float, float addrspace(1)* %tmp6, align 4
+  %tmp6 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp5
+  %tmp7 = load float, ptr addrspace(1) %tmp6, align 4
   %tmp8 = fmul float %tmp7, 1.000000e+00
-  store float %tmp8, float addrspace(1)* %tmp6, align 4
+  store float %tmp8, ptr addrspace(1) %tmp6, align 4
   ret void
 }
 
@@ -45,19 +45,19 @@ bb:
 ; GFX908: call float @llvm.fma.f32
 ; GFX908: call float @llvm.fma.f32
 ; GFX90A: call <2 x float> @llvm.fma.v2f32
-define amdgpu_kernel void @fma_combine(float addrspace(1)* %arg) {
+define amdgpu_kernel void @fma_combine(ptr addrspace(1) %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = zext i32 %tmp to i64
-  %tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp1
-  %tmp3 = load float, float addrspace(1)* %tmp2, align 4
+  %tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp1
+  %tmp3 = load float, ptr addrspace(1) %tmp2, align 4
   %tmp4 = tail call float @llvm.fma.f32(float %tmp3, float 1.000000e+00, float 1.000000e+00)
-  store float %tmp4, float addrspace(1)* %tmp2, align 4
+  store float %tmp4, ptr addrspace(1) %tmp2, align 4
   %tmp5 = add nuw nsw i64 %tmp1, 1
-  %tmp6 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp5
-  %tmp7 = load float, float addrspace(1)* %tmp6, align 4
+  %tmp6 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp5
+  %tmp7 = load float, ptr addrspace(1) %tmp6, align 4
   %tmp8 = tail call float @llvm.fma.f32(float %tmp7, float 1.000000e+00, float 1.000000e+00)
-  store float %tmp8, float addrspace(1)* %tmp6, align 4
+  store float %tmp8, ptr addrspace(1) %tmp6, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll b/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
index 80b1bdde47a8e..e2626c4efc1ad 100644
--- a/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
+++ b/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -6,23 +6,23 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 ; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
 ; to do this scalar.
 
-define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
+define void @expensive_double_store(ptr noalias %dst, ptr noalias %src, i64 %count) {
 ; CHECK-LABEL: @expensive_double_store(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[SRC:%.*]], align 8
-; CHECK-NEXT:    store double [[TMP0]], double* [[DST:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST]], i64 1
-; CHECK-NEXT:    store double [[TMP1]], double* [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[SRC:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP0]], ptr [[DST:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 1
+; CHECK-NEXT:    store double [[TMP1]], ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load double, double* %src, align 8
-  store double %0, double* %dst, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %src, i64 1
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %dst, i64 1
-  store double %1, double* %arrayidx3, align 8
+  %0 = load double, ptr %src, align 8
+  store double %0, ptr %dst, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %src, i64 1
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %dst, i64 1
+  store double %1, ptr %arrayidx3, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
index e77f8e9edfb96..712e847a49b17 100644
--- a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
+++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
@@ -2,22 +2,18 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 | FileCheck %s
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_40 | FileCheck %s -check-prefix=NOVECTOR
 
-define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %arg, i8* noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 {
+define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %arg, ptr noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 {
 ; CHECK-LABEL: @fusion(
 ; CHECK-NEXT:    [[TMP:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half*
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half*
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x half>, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], <half 0xH5380, half 0xH5380>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x half> [[TMP3]], <half 0xH57F0, half 0xH57F0>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast half* [[TMP16]] to <2 x half>*
-; CHECK-NEXT:    store <2 x half> [[TMP4]], <2 x half>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x half> [[TMP4]], ptr [[TMP16]], align 8
 ; CHECK-NEXT:    ret void
 ;
 ; NOVECTOR-LABEL: @fusion(
@@ -26,20 +22,18 @@ define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %a
 ; NOVECTOR-NEXT:    [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2
 ; NOVECTOR-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
 ; NOVECTOR-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], 1
-; NOVECTOR-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half*
-; NOVECTOR-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]]
-; NOVECTOR-NEXT:    [[TMP12:%.*]] = load half, half* [[TMP11]], align 8
+; NOVECTOR-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]]
+; NOVECTOR-NEXT:    [[TMP12:%.*]] = load half, ptr [[TMP11]], align 8
 ; NOVECTOR-NEXT:    [[TMP13:%.*]] = fmul fast half [[TMP12]], 0xH5380
 ; NOVECTOR-NEXT:    [[TMP14:%.*]] = fadd fast half [[TMP13]], 0xH57F0
-; NOVECTOR-NEXT:    [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half*
-; NOVECTOR-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]]
-; NOVECTOR-NEXT:    store half [[TMP14]], half* [[TMP16]], align 8
-; NOVECTOR-NEXT:    [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]]
-; NOVECTOR-NEXT:    [[TMP18:%.*]] = load half, half* [[TMP17]], align 2
+; NOVECTOR-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]]
+; NOVECTOR-NEXT:    store half [[TMP14]], ptr [[TMP16]], align 8
+; NOVECTOR-NEXT:    [[TMP17:%.*]] = getelementptr inbounds half, ptr [[ARG1]], i64 [[TMP7]]
+; NOVECTOR-NEXT:    [[TMP18:%.*]] = load half, ptr [[TMP17]], align 2
 ; NOVECTOR-NEXT:    [[TMP19:%.*]] = fmul fast half [[TMP18]], 0xH5380
 ; NOVECTOR-NEXT:    [[TMP20:%.*]] = fadd fast half [[TMP19]], 0xH57F0
-; NOVECTOR-NEXT:    [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]]
-; NOVECTOR-NEXT:    store half [[TMP20]], half* [[TMP21]], align 2
+; NOVECTOR-NEXT:    [[TMP21:%.*]] = getelementptr inbounds half, ptr [[ARG]], i64 [[TMP7]]
+; NOVECTOR-NEXT:    store half [[TMP20]], ptr [[TMP21]], align 2
 ; NOVECTOR-NEXT:    ret void
 ;
   %tmp = shl nuw nsw i32 %arg2, 6
@@ -47,20 +41,18 @@ define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %a
   %tmp5 = shl nuw nsw i32 %tmp4, 2
   %tmp6 = zext i32 %tmp5 to i64
   %tmp7 = or i64 %tmp6, 1
-  %tmp10 = bitcast i8* %arg1 to half*
-  %tmp11 = getelementptr inbounds half, half* %tmp10, i64 %tmp6
-  %tmp12 = load half, half* %tmp11, align 8
+  %tmp11 = getelementptr inbounds half, ptr %arg1, i64 %tmp6
+  %tmp12 = load half, ptr %tmp11, align 8
   %tmp13 = fmul fast half %tmp12, 0xH5380
   %tmp14 = fadd fast half %tmp13, 0xH57F0
-  %tmp15 = bitcast i8* %arg to half*
-  %tmp16 = getelementptr inbounds half, half* %tmp15, i64 %tmp6
-  store half %tmp14, half* %tmp16, align 8
-  %tmp17 = getelementptr inbounds half, half* %tmp10, i64 %tmp7
-  %tmp18 = load half, half* %tmp17, align 2
+  %tmp16 = getelementptr inbounds half, ptr %arg, i64 %tmp6
+  store half %tmp14, ptr %tmp16, align 8
+  %tmp17 = getelementptr inbounds half, ptr %arg1, i64 %tmp7
+  %tmp18 = load half, ptr %tmp17, align 2
   %tmp19 = fmul fast half %tmp18, 0xH5380
   %tmp20 = fadd fast half %tmp19, 0xH57F0
-  %tmp21 = getelementptr inbounds half, half* %tmp15, i64 %tmp7
-  store half %tmp20, half* %tmp21, align 2
+  %tmp21 = getelementptr inbounds half, ptr %arg, i64 %tmp7
+  store half %tmp20, ptr %tmp21, align 2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
index 34aa5067d0dd3..ed1cdddc4a84b 100644
--- a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -passes=slp-vectorizer < %s | FileCheck %s
 
-%struct.S = type { i8*, i8* }
+%struct.S = type { ptr, ptr }
 
 @kS0 = common global %struct.S zeroinitializer, align 8
 
 define { i64, i64 } @getS() {
 ; CHECK-LABEL: @getS(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @kS0, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @kS0, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret { i64, i64 } [[TMP3]]
 ;
 entry:
-  %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-  %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+  %0 = load i64, ptr @kS0, align 8
+  %1 = load i64, ptr getelementptr inbounds (%struct.S, ptr @kS0, i64 0, i32 1), align 8
   %2 = insertvalue { i64, i64 } undef, i64 %0, 0
   %3 = insertvalue { i64, i64 } %2, i64 %1, 1
   ret { i64, i64 } %3

diff  --git a/llvm/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll b/llvm/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
index 5eabd7c8800e0..ae130af0bc0c5 100644
--- a/llvm/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
+++ b/llvm/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
@@ -1,42 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -passes=slp-vectorizer < %s | FileCheck %s
 
-%struct.A = type { i8*, i8* }
+%struct.A = type { ptr, ptr }
 
-define i64 @foo(%struct.A* nocapture readonly %this) {
+define i64 @foo(ptr nocapture readonly %this) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[END_I:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.A* [[THIS:%.*]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8** [[END_I]] to i64*
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %struct.A* [[THIS]] to i64*
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
+; CHECK-NEXT:    [[END_I:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[THIS:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[END_I]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[THIS]], align 8
 ; CHECK-NEXT:    [[SUB_PTR_SUB_I:%.*]] = sub i64 [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SUB_PTR_SUB_I]], 9
 ; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
 ; CHECK:       lor.lhs.false:
-; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i8*
-; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP1]] to i8*
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i8* [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt ptr [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[CMP2]], i64 2, i64 -1
 ; CHECK-NEXT:    ret i64 [[DOT]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret i64 2
 ;
 entry:
-  %end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
-  %0 = bitcast i8** %end.i to i64*
-  %1 = load i64, i64* %0, align 8
-  %2 = bitcast %struct.A* %this to i64*
-  %3 = load i64, i64* %2, align 8
-  %sub.ptr.sub.i = sub i64 %1, %3
+  %end.i = getelementptr inbounds %struct.A, ptr %this, i64 0, i32 1
+  %0 = load i64, ptr %end.i, align 8
+  %1 = load i64, ptr %this, align 8
+  %sub.ptr.sub.i = sub i64 %0, %1
   %cmp = icmp sgt i64 %sub.ptr.sub.i, 9
   br i1 %cmp, label %return, label %lor.lhs.false
 
 lor.lhs.false:
-  %4 = inttoptr i64 %3 to i8*
-  %5 = inttoptr i64 %1 to i8*
-  %cmp2 = icmp ugt i8* %5, %4
+  %2 = inttoptr i64 %1 to ptr
+  %3 = inttoptr i64 %0 to ptr
+  %cmp2 = icmp ugt ptr %3, %2
   %. = select i1 %cmp2, i64 2, i64 -1
   ret i64 %.
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/PowerPC/short-to-double.ll b/llvm/test/Transforms/SLPVectorizer/PowerPC/short-to-double.ll
index 13428342995e5..e12bcdcd51abf 100644
--- a/llvm/test/Transforms/SLPVectorizer/PowerPC/short-to-double.ll
+++ b/llvm/test/Transforms/SLPVectorizer/PowerPC/short-to-double.ll
@@ -4,7 +4,7 @@
 %struct._pp = type { i16, i16, i16, i16 }
 
 ; Function Attrs: norecurse nounwind readonly
-define [5 x double] @foo(double %k, i64 %n, %struct._pp* nocapture readonly %p) local_unnamed_addr #0 {
+define [5 x double] @foo(double %k, i64 %n, ptr nocapture readonly %p) local_unnamed_addr #0 {
 entry:
   %cmp17 = icmp sgt i64 %n, 0
   br i1 %cmp17, label %for.body, label %for.cond.cleanup
@@ -20,13 +20,13 @@ for.body:                                         ; preds = %entry, %for.body
   %i.020 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %retval.sroa.4.019 = phi double [ %add10, %for.body ], [ 0.000000e+00, %entry ]
   %retval.sroa.0.018 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
-  %r1 = getelementptr inbounds %struct._pp, %struct._pp* %p, i64 %i.020, i32 2
-  %0 = load i16, i16* %r1, align 2
+  %r1 = getelementptr inbounds %struct._pp, ptr %p, i64 %i.020, i32 2
+  %0 = load i16, ptr %r1, align 2
   %conv2 = uitofp i16 %0 to double
   %mul = fmul double %conv2, %k
   %add = fadd double %retval.sroa.0.018, %mul
-  %g5 = getelementptr inbounds %struct._pp, %struct._pp* %p, i64 %i.020, i32 1
-  %1 = load i16, i16* %g5, align 2
+  %g5 = getelementptr inbounds %struct._pp, ptr %p, i64 %i.020, i32 1
+  %1 = load i16, ptr %g5, align 2
   %conv7 = uitofp i16 %1 to double
   %mul8 = fmul double %conv7, %k
   %add10 = fadd double %retval.sroa.4.019, %mul8

diff  --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
index bc53195847f5d..cb017795077f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
@@ -9,54 +9,52 @@
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
 target triple = "riscv64"
 
-define void @foo(i64* nocapture writeonly %da) {
+define void @foo(ptr nocapture writeonly %da) {
 ; CHECK-128-LABEL: @foo(
 ; CHECK-128-NEXT:  entry:
-; CHECK-128-NEXT:    store i64 0, i64* [[DA:%.*]], align 8
-; CHECK-128-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 1
-; CHECK-128-NEXT:    store i64 0, i64* [[ARRAYIDX1]], align 8
-; CHECK-128-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2
-; CHECK-128-NEXT:    store i64 0, i64* [[ARRAYIDX2]], align 8
-; CHECK-128-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3
-; CHECK-128-NEXT:    store i64 0, i64* [[ARRAYIDX3]], align 8
+; CHECK-128-NEXT:    store i64 0, ptr [[DA:%.*]], align 8
+; CHECK-128-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 1
+; CHECK-128-NEXT:    store i64 0, ptr [[ARRAYIDX1]], align 8
+; CHECK-128-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 2
+; CHECK-128-NEXT:    store i64 0, ptr [[ARRAYIDX2]], align 8
+; CHECK-128-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 3
+; CHECK-128-NEXT:    store i64 0, ptr [[ARRAYIDX3]], align 8
 ; CHECK-128-NEXT:    ret void
 ;
 ; CHECK-256-LABEL: @foo(
 ; CHECK-256-NEXT:  entry:
-; CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
-; CHECK-256-NEXT:    store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
+; CHECK-256-NEXT:    store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8
 ; CHECK-256-NEXT:    ret void
 ;
 ; CHECK-512-LABEL: @foo(
 ; CHECK-512-NEXT:  entry:
-; CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
-; CHECK-512-NEXT:    store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
+; CHECK-512-NEXT:    store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8
 ; CHECK-512-NEXT:    ret void
 ;
 entry:
-  store i64 0, i64* %da, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %da, i64 1
-  store i64 0, i64* %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds i64, i64* %da, i64 2
-  store i64 0, i64* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds i64, i64* %da, i64 3
-  store i64 0, i64* %arrayidx3, align 8
+  store i64 0, ptr %da, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %da, i64 1
+  store i64 0, ptr %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds i64, ptr %da, i64 2
+  store i64 0, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds i64, ptr %da, i64 3
+  store i64 0, ptr %arrayidx3, align 8
   ret void
 }
 
-define void @foo8(i8* nocapture writeonly %da) {
+define void @foo8(ptr nocapture writeonly %da) {
 ; CHECK-LABEL: @foo8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8 0, i8* [[DA:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 1
-; CHECK-NEXT:    store i8 0, i8* [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 2
+; CHECK-NEXT:    store i8 0, ptr [[DA:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DA]], i8 1
+; CHECK-NEXT:    store i8 0, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[DA]], i8 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  store i8 0, i8* %da, align 8
-  %arrayidx1 = getelementptr inbounds i8, i8* %da, i8 1
-  store i8 0, i8* %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds i8, i8* %da, i8 2
+  store i8 0, ptr %da, align 8
+  %arrayidx1 = getelementptr inbounds i8, ptr %da, i8 1
+  store i8 0, ptr %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds i8, ptr %da, i8 2
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
index 9ad7ad0cdcc8e..b03c8788c2b63 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
@@ -6,7 +6,7 @@
 ; node.
 
 ; Function Attrs: norecurse nounwind readonly
-define void @fun(i8* nocapture, i32 zeroext) local_unnamed_addr #0 {
+define void @fun(ptr nocapture, i32 zeroext) local_unnamed_addr #0 {
 .lr.ph.preheader:
   br label %.lr.ph
 
@@ -20,11 +20,11 @@ define void @fun(i8* nocapture, i32 zeroext) local_unnamed_addr #0 {
   %7 = select i1 %6, i32 0, i32 %1
   %.9 = sub i32 %3, %7
   %8 = zext i32 %. to i64
-  %9 = getelementptr inbounds i8, i8* %0, i64 %8
-  %10 = load i8, i8* %9, align 1
+  %9 = getelementptr inbounds i8, ptr %0, i64 %8
+  %10 = load i8, ptr %9, align 1
   %11 = zext i32 %.9 to i64
-  %12 = getelementptr inbounds i8, i8* %0, i64 %11
-  %13 = load i8, i8* %12, align 1
+  %12 = getelementptr inbounds i8, ptr %0, i64 %11
+  %13 = load i8, ptr %12, align 1
   %14 = icmp eq i8 %10, %13
   br i1 %14, label %.lr.ph, label %._crit_edge
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
index b5538a539a091..750dac29a57e1 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -8,41 +8,40 @@ define void @foo() local_unnamed_addr {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD277:%.*]] = add nsw i32 undef, undef
-; CHECK-NEXT:    store i32 [[ADD277]], i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
-; CHECK-NEXT:    [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2) to <2 x i32>*), align 4
+; CHECK-NEXT:    store i32 [[ADD277]], ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 1), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
+; CHECK-NEXT:    [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[ARRAYIDX372]], align 4
 ; CHECK-NEXT:    unreachable
 ;
 entry:
   %add277 = add nsw i32 undef, undef
-  store i32 %add277, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
-  %0 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
+  store i32 %add277, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 1), align 4
+  %0 = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
   %sub355 = add nsw i32 undef, %0
   %shr.i = ashr i32 %sub355, 6
-  %arrayidx372 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
-  store i32 %shr.i, i32* %arrayidx372, align 4
+  %arrayidx372 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
+  store i32 %shr.i, ptr %arrayidx372, align 4
   %sub355.1 = add nsw i32 undef, %add277
   %shr.i.1 = ashr i32 %sub355.1, 6
-  %arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
-  store i32 %shr.i.1, i32* %arrayidx372.1, align 4
-  %1 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
+  %arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 1
+  store i32 %shr.i.1, ptr %arrayidx372.1, align 4
+  %1 = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
   %sub355.2 = add nsw i32 undef, %1
   %shr.i.2 = ashr i32 %sub355.2, 6
-  %arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
-  store i32 %shr.i.2, i32* %arrayidx372.2, align 4
-  %2 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
+  %arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 2
+  store i32 %shr.i.2, ptr %arrayidx372.2, align 4
+  %2 = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 3), align 4
   %sub355.3 = add nsw i32 undef, %2
   %shr.i.3 = ashr i32 %sub355.3, 6
-  %arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
-  store i32 %shr.i.3, i32* %arrayidx372.3, align 4
+  %arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 3
+  store i32 %shr.i.3, ptr %arrayidx372.3, align 4
   unreachable
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll b/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll
index 8ac8c34b85f35..c1868c54f268d 100644
--- a/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll
@@ -9,69 +9,69 @@
 ; VE-NOT: x double
 ; SSE: x double
 
-define void @foo(double* noalias %A0p, double* noalias %B0p) {
+define void @foo(ptr noalias %A0p, ptr noalias %B0p) {
 entry:
-  %A1p = getelementptr inbounds double, double* %A0p, i64 1
-  %A2p = getelementptr inbounds double, double* %A0p, i64 2
-  %A3p = getelementptr inbounds double, double* %A0p, i64 3
-  %A4p = getelementptr inbounds double, double* %A0p, i64 4
-  %A5p = getelementptr inbounds double, double* %A0p, i64 5
-  %A6p = getelementptr inbounds double, double* %A0p, i64 6
-  %A7p = getelementptr inbounds double, double* %A0p, i64 7
-  %A8p = getelementptr inbounds double, double* %A0p, i64 8
-  %A9p = getelementptr inbounds double, double* %A0p, i64 9
-  %A10p = getelementptr inbounds double, double* %A0p, i64 10
-  %A11p = getelementptr inbounds double, double* %A0p, i64 11
-  %A12p = getelementptr inbounds double, double* %A0p, i64 12
-  %A13p = getelementptr inbounds double, double* %A0p, i64 13
-  %A14p = getelementptr inbounds double, double* %A0p, i64 14
-  %A15p = getelementptr inbounds double, double* %A0p, i64 15
-  %A0 = load double, double* %A0p, align 8
-  %A1 = load double, double* %A1p, align 8
-  %A2 = load double, double* %A2p, align 8
-  %A3 = load double, double* %A3p, align 8
-  %A4 = load double, double* %A4p, align 8
-  %A5 = load double, double* %A5p, align 8
-  %A6 = load double, double* %A6p, align 8
-  %A7 = load double, double* %A7p, align 8
-  %A8 = load double, double* %A8p, align 8
-  %A9 = load double, double* %A9p, align 8
-  %A10 = load double, double* %A10p, align 8
-  %A11 = load double, double* %A11p, align 8
-  %A12 = load double, double* %A12p, align 8
-  %A13 = load double, double* %A13p, align 8
-  %A14 = load double, double* %A14p, align 8
-  %A15 = load double, double* %A15p, align 8
-  %B1p = getelementptr inbounds double, double* %B0p, i64 1
-  %B2p = getelementptr inbounds double, double* %B0p, i64 2
-  %B3p = getelementptr inbounds double, double* %B0p, i64 3
-  %B4p = getelementptr inbounds double, double* %B0p, i64 4
-  %B5p = getelementptr inbounds double, double* %B0p, i64 5
-  %B6p = getelementptr inbounds double, double* %B0p, i64 6
-  %B7p = getelementptr inbounds double, double* %B0p, i64 7
-  %B8p = getelementptr inbounds double, double* %B0p, i64 8
-  %B9p = getelementptr inbounds double, double* %B0p, i64 9
-  %B10p = getelementptr inbounds double, double* %B0p, i64 10
-  %B11p = getelementptr inbounds double, double* %B0p, i64 11
-  %B12p = getelementptr inbounds double, double* %B0p, i64 12
-  %B13p = getelementptr inbounds double, double* %B0p, i64 13
-  %B14p = getelementptr inbounds double, double* %B0p, i64 14
-  %B15p = getelementptr inbounds double, double* %B0p, i64 15
-  store double %A0, double* %B0p, align 8
-  store double %A1, double* %B1p, align 8
-  store double %A2, double* %B2p, align 8
-  store double %A3, double* %B3p, align 8
-  store double %A4, double* %B4p, align 8
-  store double %A5, double* %B5p, align 8
-  store double %A6, double* %B6p, align 8
-  store double %A7, double* %B7p, align 8
-  store double %A8, double* %B8p, align 8
-  store double %A9, double* %B9p, align 8
-  store double %A10, double* %B10p, align 8
-  store double %A11, double* %B11p, align 8
-  store double %A12, double* %B12p, align 8
-  store double %A13, double* %B13p, align 8
-  store double %A14, double* %B14p, align 8
-  store double %A15, double* %B15p, align 8
+  %A1p = getelementptr inbounds double, ptr %A0p, i64 1
+  %A2p = getelementptr inbounds double, ptr %A0p, i64 2
+  %A3p = getelementptr inbounds double, ptr %A0p, i64 3
+  %A4p = getelementptr inbounds double, ptr %A0p, i64 4
+  %A5p = getelementptr inbounds double, ptr %A0p, i64 5
+  %A6p = getelementptr inbounds double, ptr %A0p, i64 6
+  %A7p = getelementptr inbounds double, ptr %A0p, i64 7
+  %A8p = getelementptr inbounds double, ptr %A0p, i64 8
+  %A9p = getelementptr inbounds double, ptr %A0p, i64 9
+  %A10p = getelementptr inbounds double, ptr %A0p, i64 10
+  %A11p = getelementptr inbounds double, ptr %A0p, i64 11
+  %A12p = getelementptr inbounds double, ptr %A0p, i64 12
+  %A13p = getelementptr inbounds double, ptr %A0p, i64 13
+  %A14p = getelementptr inbounds double, ptr %A0p, i64 14
+  %A15p = getelementptr inbounds double, ptr %A0p, i64 15
+  %A0 = load double, ptr %A0p, align 8
+  %A1 = load double, ptr %A1p, align 8
+  %A2 = load double, ptr %A2p, align 8
+  %A3 = load double, ptr %A3p, align 8
+  %A4 = load double, ptr %A4p, align 8
+  %A5 = load double, ptr %A5p, align 8
+  %A6 = load double, ptr %A6p, align 8
+  %A7 = load double, ptr %A7p, align 8
+  %A8 = load double, ptr %A8p, align 8
+  %A9 = load double, ptr %A9p, align 8
+  %A10 = load double, ptr %A10p, align 8
+  %A11 = load double, ptr %A11p, align 8
+  %A12 = load double, ptr %A12p, align 8
+  %A13 = load double, ptr %A13p, align 8
+  %A14 = load double, ptr %A14p, align 8
+  %A15 = load double, ptr %A15p, align 8
+  %B1p = getelementptr inbounds double, ptr %B0p, i64 1
+  %B2p = getelementptr inbounds double, ptr %B0p, i64 2
+  %B3p = getelementptr inbounds double, ptr %B0p, i64 3
+  %B4p = getelementptr inbounds double, ptr %B0p, i64 4
+  %B5p = getelementptr inbounds double, ptr %B0p, i64 5
+  %B6p = getelementptr inbounds double, ptr %B0p, i64 6
+  %B7p = getelementptr inbounds double, ptr %B0p, i64 7
+  %B8p = getelementptr inbounds double, ptr %B0p, i64 8
+  %B9p = getelementptr inbounds double, ptr %B0p, i64 9
+  %B10p = getelementptr inbounds double, ptr %B0p, i64 10
+  %B11p = getelementptr inbounds double, ptr %B0p, i64 11
+  %B12p = getelementptr inbounds double, ptr %B0p, i64 12
+  %B13p = getelementptr inbounds double, ptr %B0p, i64 13
+  %B14p = getelementptr inbounds double, ptr %B0p, i64 14
+  %B15p = getelementptr inbounds double, ptr %B0p, i64 15
+  store double %A0, ptr %B0p, align 8
+  store double %A1, ptr %B1p, align 8
+  store double %A2, ptr %B2p, align 8
+  store double %A3, ptr %B3p, align 8
+  store double %A4, ptr %B4p, align 8
+  store double %A5, ptr %B5p, align 8
+  store double %A6, ptr %B6p, align 8
+  store double %A7, ptr %B7p, align 8
+  store double %A8, ptr %B8p, align 8
+  store double %A9, ptr %B9p, align 8
+  store double %A10, ptr %B10p, align 8
+  store double %A11, ptr %B11p, align 8
+  store double %A12, ptr %B12p, align 8
+  store double %A13, ptr %B13p, align 8
+  store double %A14, ptr %B14p, align 8
+  store double %A15, ptr %B15p, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll b/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
index 6491644bb03e1..24365a850d91d 100644
--- a/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
@@ -7,32 +7,32 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-define void @foo(<2 x i64> %x, <4 x i32> %y, i64* %out) #0 {
+define void @foo(<2 x i64> %x, <4 x i32> %y, ptr %out) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[A:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[B:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 2
 ; CHECK-NEXT:    [[CONV6:%.*]] = zext i32 [[B]] to i64
 ; CHECK-NEXT:    [[C:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[A]], i64 [[A]], i64 [[CONV6]])
-; CHECK-NEXT:    store i64 [[C]], i64* [[OUT:%.*]], align 8
+; CHECK-NEXT:    store i64 [[C]], ptr [[OUT:%.*]], align 8
 ; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x i32> [[Y]], i64 3
 ; CHECK-NEXT:    [[CONV17:%.*]] = zext i32 [[E]] to i64
 ; CHECK-NEXT:    [[F:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[D]], i64 [[D]], i64 [[CONV17]])
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[OUT]], i32 1
-; CHECK-NEXT:    store i64 [[F]], i64* [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[OUT]], i32 1
+; CHECK-NEXT:    store i64 [[F]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a = extractelement <2 x i64> %x, i32 0
   %b = extractelement <4 x i32> %y, i32 2
   %conv6 = zext i32 %b to i64
   %c = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %conv6)
-  store i64 %c, i64* %out
+  store i64 %c, ptr %out
   %d = extractelement <2 x i64> %x, i32 1
   %e = extractelement <4 x i32> %y, i32 3
   %conv17 = zext i32 %e to i64
   %f = tail call i64 @llvm.fshl.i64(i64 %d, i64 %d, i64 %conv17)
-  %arrayidx2 = getelementptr inbounds i64, i64* %out, i32 1
-  store i64 %f, i64* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i64, ptr %out, i32 1
+  store i64 %f, ptr %arrayidx2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll
index e62cd6065e913..38ad8041bdf8f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll
@@ -3,29 +3,29 @@
 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 @shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4
- at data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4
+ at data = common local_unnamed_addr global [10 x ptr] zeroinitializer, align 4
 
 define void @flat(i32 %intensity) {
 ; CHECK-LABEL: @flat(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @shift, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([10 x i32], ptr @shift, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr @data, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr getelementptr inbounds ([10 x ptr], ptr @data, i32 0, i32 1), align 4
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 1, [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 [[SHR]]
 ; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 [[SHR1]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[D1_DATA_046:%.*]] = phi ptr [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP4]] to i32
 ; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[CONV]], -128
-; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
 ; CHECK-NEXT:    [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
@@ -36,22 +36,22 @@ define void @flat(i32 %intensity) {
 ; CHECK-NEXT:    [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32 [[SUB12]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
 ; CHECK-NEXT:    [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[D1_DATA_046]], i32 [[IDX_NEG]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ADD_PTR]], align 1
 ; CHECK-NEXT:    [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
 ; CHECK-NEXT:    [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]]
 ; CHECK-NEXT:    [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
-; CHECK-NEXT:    store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
-; CHECK-NEXT:    [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
+; CHECK-NEXT:    store i8 [[CONV17]], ptr [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[D1_DATA_046]], i32 [[ADD]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ADD_PTR18]], align 1
 ; CHECK-NEXT:    [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
 ; CHECK-NEXT:    [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
-; CHECK-NEXT:    store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
-; CHECK-NEXT:    [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    store i8 [[CONV21]], ptr [[ADD_PTR18]], align 1
+; CHECK-NEXT:    [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[D1_DATA_046]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
 ; CHECK-NEXT:    [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
-; CHECK-NEXT:    [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
 ; CHECK-NEXT:    [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
 ; CHECK-NEXT:    [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
 ; CHECK-NEXT:    [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
@@ -62,43 +62,43 @@ define void @flat(i32 %intensity) {
 ; CHECK-NEXT:    [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]], i32 [[SUB12_1]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
 ; CHECK-NEXT:    [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
-; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
+; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR23]], i32 [[IDX_NEG_1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
 ; CHECK-NEXT:    [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
 ; CHECK-NEXT:    [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]]
 ; CHECK-NEXT:    [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
-; CHECK-NEXT:    store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
-; CHECK-NEXT:    [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
+; CHECK-NEXT:    store i8 [[CONV17_1]], ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT:    [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR23]], i32 [[ADD_1]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[ADD_PTR18_1]], align 1
 ; CHECK-NEXT:    [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
 ; CHECK-NEXT:    [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
-; CHECK-NEXT:    store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
-; CHECK-NEXT:    [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]]
+; CHECK-NEXT:    store i8 [[CONV21_1]], ptr [[ADD_PTR18_1]], align 1
+; CHECK-NEXT:    [[ADD_PTR23_1]] = getelementptr inbounds i8, ptr [[ADD_PTR23]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[Y_045]], 2
 ; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
 ; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ;
 entry:
-  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
-  %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
-  %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
-  %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
+  %0 = load i32, ptr @shift, align 4
+  %1 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @shift, i32 0, i32 1), align 4
+  %2 = load ptr, ptr @data, align 4
+  %3 = load ptr, ptr getelementptr inbounds ([10 x ptr], ptr @data, i32 0, i32 1), align 4
   %shr = lshr i32 1, %0
-  %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
+  %arrayidx = getelementptr inbounds i8, ptr %2, i32 %shr
   %shr1 = lshr i32 1, %1
-  %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
+  %arrayidx2 = getelementptr inbounds i8, ptr %3, i32 %shr1
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
   ret void
 
 for.body:                                         ; preds = %for.body, %entry
-  %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
+  %d1_data.046 = phi ptr [ %3, %entry ], [ %add.ptr23.1, %for.body ]
   %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
-  %4 = load i8, i8* %arrayidx, align 1
+  %4 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %4 to i32
   %sub = add nsw i32 %conv, -128
-  %5 = load i8, i8* %arrayidx2, align 1
+  %5 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %5 to i32
   %sub4 = add nsw i32 %conv3, -128
   %cmp5 = icmp sgt i32 %sub, -1
@@ -109,22 +109,22 @@ for.body:                                         ; preds = %for.body, %entry
   %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
   %add = add nsw i32 %cond14, %cond
   %idx.neg = sub nsw i32 0, %add
-  %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
-  %6 = load i8, i8* %add.ptr, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %d1_data.046, i32 %idx.neg
+  %6 = load i8, ptr %add.ptr, align 1
   %conv15 = zext i8 %6 to i32
   %add16 = add nsw i32 %conv15, %intensity
   %conv17 = trunc i32 %add16 to i8
-  store i8 %conv17, i8* %add.ptr, align 1
-  %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
-  %7 = load i8, i8* %add.ptr18, align 1
+  store i8 %conv17, ptr %add.ptr, align 1
+  %add.ptr18 = getelementptr inbounds i8, ptr %d1_data.046, i32 %add
+  %7 = load i8, ptr %add.ptr18, align 1
   %not.tobool = icmp eq i8 %7, 0
   %conv21 = zext i1 %not.tobool to i8
-  store i8 %conv21, i8* %add.ptr18, align 1
-  %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
-  %8 = load i8, i8* %arrayidx, align 1
+  store i8 %conv21, ptr %add.ptr18, align 1
+  %add.ptr23 = getelementptr inbounds i8, ptr %d1_data.046, i32 %1
+  %8 = load i8, ptr %arrayidx, align 1
   %conv.1 = zext i8 %8 to i32
   %sub.1 = add nsw i32 %conv.1, -128
-  %9 = load i8, i8* %arrayidx2, align 1
+  %9 = load i8, ptr %arrayidx2, align 1
   %conv3.1 = zext i8 %9 to i32
   %sub4.1 = add nsw i32 %conv3.1, -128
   %cmp5.1 = icmp sgt i32 %sub.1, -1
@@ -135,18 +135,18 @@ for.body:                                         ; preds = %for.body, %entry
   %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
   %add.1 = add nsw i32 %cond14.1, %cond.1
   %idx.neg.1 = sub nsw i32 0, %add.1
-  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
-  %10 = load i8, i8* %add.ptr.1, align 1
+  %add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr23, i32 %idx.neg.1
+  %10 = load i8, ptr %add.ptr.1, align 1
   %conv15.1 = zext i8 %10 to i32
   %add16.1 = add nsw i32 %conv15.1, %intensity
   %conv17.1 = trunc i32 %add16.1 to i8
-  store i8 %conv17.1, i8* %add.ptr.1, align 1
-  %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
-  %11 = load i8, i8* %add.ptr18.1, align 1
+  store i8 %conv17.1, ptr %add.ptr.1, align 1
+  %add.ptr18.1 = getelementptr inbounds i8, ptr %add.ptr23, i32 %add.1
+  %11 = load i8, ptr %add.ptr18.1, align 1
   %not.tobool.1 = icmp eq i8 %11, 0
   %conv21.1 = zext i1 %not.tobool.1 to i8
-  store i8 %conv21.1, i8* %add.ptr18.1, align 1
-  %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
+  store i8 %conv21.1, ptr %add.ptr18.1, align 1
+  %add.ptr23.1 = getelementptr inbounds i8, ptr %add.ptr23, i32 %1
   %inc.1 = add nsw i32 %y.045, 2
   %exitcond.1 = icmp eq i32 %inc.1, 128
   br i1 %exitcond.1, label %for.cond.cleanup, label %for.body

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
index dbfd2b23d5376..66bfe05683157 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
@@ -1,85 +1,81 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
 
-define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
+define void @i64_simplified(ptr noalias %st, ptr noalias %ld) {
 ; CHECK-LABEL: @i64_simplified(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[LD:%.*]], align 8
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], ptr [[ST:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
+  %arrayidx1 = getelementptr inbounds i64, ptr %ld, i64 1
 
-  %t0 = load i64, i64* %ld, align 8
-  %t1 = load i64, i64* %arrayidx1, align 8
+  %t0 = load i64, ptr %ld, align 8
+  %t1 = load i64, ptr %arrayidx1, align 8
 
-  %arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
-  %arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
-  %arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
+  %arrayidx3 = getelementptr inbounds i64, ptr %st, i64 1
+  %arrayidx4 = getelementptr inbounds i64, ptr %st, i64 2
+  %arrayidx5 = getelementptr inbounds i64, ptr %st, i64 3
 
-  store i64 %t0, i64* %st, align 8
-  store i64 %t1, i64* %arrayidx3, align 8
-  store i64 %t0, i64* %arrayidx4, align 8
-  store i64 %t1, i64* %arrayidx5, align 8
+  store i64 %t0, ptr %st, align 8
+  store i64 %t1, ptr %arrayidx3, align 8
+  store i64 %t0, ptr %arrayidx4, align 8
+  store i64 %t1, ptr %arrayidx5, align 8
   ret void
 }
 
-define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
+define void @i64_simplifiedi_reversed(ptr noalias %st, ptr noalias %ld) {
 ; CHECK-LABEL: @i64_simplifiedi_reversed(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[LD:%.*]], align 8
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], ptr [[ST:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
+  %arrayidx1 = getelementptr inbounds i64, ptr %ld, i64 1
 
-  %t0 = load i64, i64* %ld, align 8
-  %t1 = load i64, i64* %arrayidx1, align 8
+  %t0 = load i64, ptr %ld, align 8
+  %t1 = load i64, ptr %arrayidx1, align 8
 
-  %arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
-  %arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
-  %arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
+  %arrayidx3 = getelementptr inbounds i64, ptr %st, i64 1
+  %arrayidx4 = getelementptr inbounds i64, ptr %st, i64 2
+  %arrayidx5 = getelementptr inbounds i64, ptr %st, i64 3
 
-  store i64 %t1, i64* %st, align 8
-  store i64 %t0, i64* %arrayidx3, align 8
-  store i64 %t1, i64* %arrayidx4, align 8
-  store i64 %t0, i64* %arrayidx5, align 8
+  store i64 %t1, ptr %st, align 8
+  store i64 %t0, ptr %arrayidx3, align 8
+  store i64 %t1, ptr %arrayidx4, align 8
+  store i64 %t0, ptr %arrayidx5, align 8
   ret void
 }
 
-define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) {
+define void @i64_simplifiedi_extract(ptr noalias %st, ptr noalias %ld) {
 ; CHECK-LABEL: @i64_simplifiedi_extract(
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
-; CHECK-NEXT:    [[T0:%.*]] = load i64, i64* [[LD]], align 8
-; CHECK-NEXT:    [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
-; CHECK-NEXT:    store i64 [[T0]], i64* [[ST]], align 8
-; CHECK-NEXT:    store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
-; CHECK-NEXT:    store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
-; CHECK-NEXT:    store i64 [[T1]], i64* [[LD]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[LD:%.*]], i64 1
+; CHECK-NEXT:    [[T0:%.*]] = load i64, ptr [[LD]], align 8
+; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[ST:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr [[ST]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, ptr [[ST]], i64 3
+; CHECK-NEXT:    store i64 [[T0]], ptr [[ST]], align 8
+; CHECK-NEXT:    store i64 [[T0]], ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    store i64 [[T0]], ptr [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    store i64 [[T1]], ptr [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    store i64 [[T1]], ptr [[LD]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
+  %arrayidx1 = getelementptr inbounds i64, ptr %ld, i64 1
 
-  %t0 = load i64, i64* %ld, align 8
-  %t1 = load i64, i64* %arrayidx1, align 8
+  %t0 = load i64, ptr %ld, align 8
+  %t1 = load i64, ptr %arrayidx1, align 8
 
-  %arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
-  %arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
-  %arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
+  %arrayidx3 = getelementptr inbounds i64, ptr %st, i64 1
+  %arrayidx4 = getelementptr inbounds i64, ptr %st, i64 2
+  %arrayidx5 = getelementptr inbounds i64, ptr %st, i64 3
 
-  store i64 %t0, i64* %st, align 8
-  store i64 %t0, i64* %arrayidx3, align 8
-  store i64 %t0, i64* %arrayidx4, align 8
-  store i64 %t1, i64* %arrayidx5, align 8
-  store i64 %t1, i64* %ld, align 8
+  store i64 %t0, ptr %st, align 8
+  store i64 %t0, ptr %arrayidx3, align 8
+  store i64 %t0, ptr %arrayidx4, align 8
+  store i64 %t1, ptr %arrayidx5, align 8
+  store i64 %t1, ptr %ld, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
index ee96df9d6e85d..b44b11a94dfaa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
@@ -5,93 +5,85 @@ define i32 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[T:%.*]] = alloca <8 x i32>, align 32
-; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i32>* [[T]] to [8 x i32]*
 ; CHECK-NEXT:    [[T2:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[T3:%.*]] = bitcast <8 x i32>* [[T]] to i8*
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[T]], i64 0, i64 0
-; CHECK-NEXT:    [[T5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 1
-; CHECK-NEXT:    [[T6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 2
-; CHECK-NEXT:    [[T7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 3
-; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 4
-; CHECK-NEXT:    [[T9:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 6
-; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 5
-; CHECK-NEXT:    [[T11:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 7
-; CHECK-NEXT:    store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, <8 x i32>* [[T]], align 32
-; CHECK-NEXT:    [[T12:%.*]] = bitcast i32* [[T2]] to i8*
-; CHECK-NEXT:    [[T13:%.*]] = load i32, i32* [[T4]], align 32
-; CHECK-NEXT:    [[T14:%.*]] = load i32, i32* [[T5]], align 4
+; CHECK-NEXT:    [[T5:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 1
+; CHECK-NEXT:    [[T6:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 2
+; CHECK-NEXT:    [[T7:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 3
+; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 4
+; CHECK-NEXT:    [[T9:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 6
+; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 5
+; CHECK-NEXT:    [[T11:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 7
+; CHECK-NEXT:    store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, ptr [[T]], align 32
+; CHECK-NEXT:    [[T13:%.*]] = load i32, ptr [[T]], align 32
+; CHECK-NEXT:    [[T14:%.*]] = load i32, ptr [[T5]], align 4
 ; CHECK-NEXT:    [[T15:%.*]] = icmp slt i32 [[T14]], [[T13]]
 ; CHECK-NEXT:    [[T16:%.*]] = select i1 [[T15]], i32 [[T14]], i32 [[T13]]
 ; CHECK-NEXT:    [[T17:%.*]] = zext i1 [[T15]] to i32
-; CHECK-NEXT:    [[T18:%.*]] = load i32, i32* [[T6]], align 8
+; CHECK-NEXT:    [[T18:%.*]] = load i32, ptr [[T6]], align 8
 ; CHECK-NEXT:    [[T19:%.*]] = icmp slt i32 [[T18]], [[T16]]
 ; CHECK-NEXT:    [[T20:%.*]] = select i1 [[T19]], i32 [[T18]], i32 [[T16]]
 ; CHECK-NEXT:    [[T21:%.*]] = select i1 [[T19]], i32 2, i32 [[T16]]
-; CHECK-NEXT:    [[T22:%.*]] = load i32, i32* [[T7]], align 4
+; CHECK-NEXT:    [[T22:%.*]] = load i32, ptr [[T7]], align 4
 ; CHECK-NEXT:    [[T23:%.*]] = icmp slt i32 [[T22]], [[T20]]
 ; CHECK-NEXT:    [[T24:%.*]] = select i1 [[T23]], i32 [[T22]], i32 [[T20]]
 ; CHECK-NEXT:    [[T25:%.*]] = select i1 [[T23]], i32 3, i32 [[T21]]
-; CHECK-NEXT:    [[T26:%.*]] = load i32, i32* [[T8]], align 16
+; CHECK-NEXT:    [[T26:%.*]] = load i32, ptr [[T8]], align 16
 ; CHECK-NEXT:    [[T27:%.*]] = icmp slt i32 [[T26]], [[T24]]
 ; CHECK-NEXT:    [[T28:%.*]] = select i1 [[T27]], i32 [[T26]], i32 [[T24]]
 ; CHECK-NEXT:    [[T29:%.*]] = select i1 [[T27]], i32 4, i32 [[T25]]
-; CHECK-NEXT:    [[T30:%.*]] = load i32, i32* [[T10]], align 4
+; CHECK-NEXT:    [[T30:%.*]] = load i32, ptr [[T10]], align 4
 ; CHECK-NEXT:    [[T31:%.*]] = icmp slt i32 [[T30]], [[T28]]
 ; CHECK-NEXT:    [[T32:%.*]] = select i1 [[T31]], i32 [[T30]], i32 [[T28]]
 ; CHECK-NEXT:    [[T33:%.*]] = select i1 [[T31]], i32 5, i32 [[T29]]
-; CHECK-NEXT:    [[T34:%.*]] = load i32, i32* [[T9]], align 8
+; CHECK-NEXT:    [[T34:%.*]] = load i32, ptr [[T9]], align 8
 ; CHECK-NEXT:    [[T35:%.*]] = icmp slt i32 [[T34]], [[T32]]
 ; CHECK-NEXT:    [[T36:%.*]] = select i1 [[T35]], i32 [[T34]], i32 [[T32]]
 ; CHECK-NEXT:    [[T37:%.*]] = select i1 [[T35]], i32 6, i32 [[T33]]
-; CHECK-NEXT:    [[T38:%.*]] = load i32, i32* [[T11]], align 4
+; CHECK-NEXT:    [[T38:%.*]] = load i32, ptr [[T11]], align 4
 ; CHECK-NEXT:    [[T39:%.*]] = icmp slt i32 [[T38]], [[T36]]
 ; CHECK-NEXT:    [[T40:%.*]] = select i1 [[T39]], i32 7, i32 [[T37]]
-; CHECK-NEXT:    store i32 [[T40]], i32* [[T2]], align 4
+; CHECK-NEXT:    store i32 [[T40]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 bb:
   %t = alloca <8 x i32>, align 32
-  %t1 = bitcast <8 x i32>* %t to [8 x i32]*
   %t2 = alloca i32, align 4
-  %t3 = bitcast <8 x i32>* %t to i8*
-  %t4 = getelementptr inbounds <8 x i32>, <8 x i32>* %t, i64 0, i64 0
-  %t5 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 1
-  %t6 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 2
-  %t7 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 3
-  %t8 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 4
-  %t9 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 6
-  %t10 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 5
-  %t11 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 7
-  store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, <8 x i32>* %t, align 32
-  %t12 = bitcast i32* %t2 to i8*
-  %t13 = load i32, i32* %t4, align 32
-  %t14 = load i32, i32* %t5, align 4
+  %t5 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 1
+  %t6 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 2
+  %t7 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 3
+  %t8 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 4
+  %t9 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 6
+  %t10 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 5
+  %t11 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 7
+  store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, ptr %t, align 32
+  %t13 = load i32, ptr %t, align 32
+  %t14 = load i32, ptr %t5, align 4
   %t15 = icmp slt i32 %t14, %t13
   %t16 = select i1 %t15, i32 %t14, i32 %t13
   %t17 = zext i1 %t15 to i32
-  %t18 = load i32, i32* %t6, align 8
+  %t18 = load i32, ptr %t6, align 8
   %t19 = icmp slt i32 %t18, %t16
   %t20 = select i1 %t19, i32 %t18, i32 %t16
   %t21 = select i1 %t19, i32 2, i32 %t16
-  %t22 = load i32, i32* %t7, align 4
+  %t22 = load i32, ptr %t7, align 4
   %t23 = icmp slt i32 %t22, %t20
   %t24 = select i1 %t23, i32 %t22, i32 %t20
   %t25 = select i1 %t23, i32 3, i32 %t21
-  %t26 = load i32, i32* %t8, align 16
+  %t26 = load i32, ptr %t8, align 16
   %t27 = icmp slt i32 %t26, %t24
   %t28 = select i1 %t27, i32 %t26, i32 %t24
   %t29 = select i1 %t27, i32 4, i32 %t25
-  %t30 = load i32, i32* %t10, align 4
+  %t30 = load i32, ptr %t10, align 4
   %t31 = icmp slt i32 %t30, %t28
   %t32 = select i1 %t31, i32 %t30, i32 %t28
   %t33 = select i1 %t31, i32 5, i32 %t29
-  %t34 = load i32, i32* %t9, align 8
+  %t34 = load i32, ptr %t9, align 8
   %t35 = icmp slt i32 %t34, %t32
   %t36 = select i1 %t35, i32 %t34, i32 %t32
   %t37 = select i1 %t35, i32 6, i32 %t33
-  %t38 = load i32, i32* %t11, align 4
+  %t38 = load i32, ptr %t11, align 4
   %t39 = icmp slt i32 %t38, %t36
   %t40 = select i1 %t39, i32 7, i32 %t37
-  store i32 %t40, i32* %t2, align 4
+  store i32 %t40, ptr %t2, align 4
   ret i32 0
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
index 3fc7185588305..5ec7aac8a7935 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
@@ -2,15 +2,14 @@
 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
 
-define void @mainTest(i32* %ptr) #0  {
+define void @mainTest(ptr %ptr) #0  {
 ; CHECK-LABEL: @mainTest(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[PTR:%.*]], null
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX3:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
@@ -26,27 +25,27 @@ define void @mainTest(i32* %ptr) #0  {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %cmp = icmp eq i32* %ptr, null
+  %cmp = icmp eq ptr %ptr, null
   br i1 %cmp, label %loop, label %bail_out
 
 loop:
   %dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ]
-  %0 = load i32, i32 * %ptr , align 4
+  %0 = load i32, ptr %ptr , align 4
   %1 = mul i32 %0, %0
   %2 = add i32 1, %1
-  %3 = getelementptr inbounds i32, i32 * %ptr, i64 1
-  %4 = load i32, i32 * %3 , align 4
+  %3 = getelementptr inbounds i32, ptr %ptr, i64 1
+  %4 = load i32, ptr %3 , align 4
   %5 = mul i32 %4, %4
   %6 = add i32 %2, %4
   %7 = add i32 %6, %5
-  %8 = getelementptr inbounds i32, i32 *%ptr, i64 2
-  %9 = load i32, i32 * %8 , align 4
+  %8 = getelementptr inbounds i32, ptr %ptr, i64 2
+  %9 = load i32, ptr %8 , align 4
   %10 = mul i32 %9, %9
   %11 = add i32 %7, %9
   %12 = add i32 %11, %10
   %13 = sext i32 %9 to i64
-  %14 = getelementptr inbounds i32, i32 *%ptr, i64 3
-  %15 = load i32, i32 * %14 , align 4
+  %14 = getelementptr inbounds i32, ptr %ptr, i64 3
+  %15 = load i32, ptr %14 , align 4
   %16 = mul i32 %15, %15
   %17 = add i32 %12, %15
   %18 = add i32 %17, %16

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
index 356d350ff9948..4132fbaa5c755 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
@@ -6,13 +6,13 @@
 define { i64, i64 } @patatino(double %arg) {
 ; CHECK-LABEL: @patatino(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr @global, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 2), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[ARG:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP0]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fptosi <2 x double> [[TMP6]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext <2 x i32> [[TMP7]] to <2 x i64>
@@ -23,19 +23,19 @@ define { i64, i64 } @patatino(double %arg) {
 ; CHECK-NEXT:    ret { i64, i64 } [[T17]]
 ;
 bb:
-  %t = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
-  %t1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
+  %t = load double, ptr @global, align 16
+  %t1 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 2), align 16
   %t2 = fmul double %t1, %arg
   %t3 = fadd double %t, %t2
-  %t4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
+  %t4 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
   %t5 = fadd double %t4, %t3
   %t6 = fptosi double %t5 to i32
   %t7 = sext i32 %t6 to i64
-  %t8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
-  %t9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
+  %t8 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 1), align 8
+  %t9 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 3), align 8
   %t10 = fmul double %t9, %arg
   %t11 = fadd double %t8, %t10
-  %t12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
+  %t12 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 5), align 8
   %t13 = fadd double %t12, %t11
   %t14 = fptosi double %t13 to i32
   %t15 = sext i32 %t14 to i64

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
index ed929791ed7a8..2142cd5668af8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
 
-define float @jacobi(float* %p, float %x, float %y, float %z) {
+define float @jacobi(ptr %p, float %x, float %y, float %z) {
 ; CHECK-LABEL: @jacobi(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, float* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, float* [[P]], i64 2
-; CHECK-NEXT:    [[P1:%.*]] = load float, float* [[GEP1]], align 4
-; CHECK-NEXT:    [[P2:%.*]] = load float, float* [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, ptr [[P]], i64 2
+; CHECK-NEXT:    [[P1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[P2:%.*]] = load float, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[P1]], [[X:%.*]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = fmul float [[P2]], [[Y:%.*]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[MUL1]], [[Z:%.*]]
 ; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[MUL2]], [[ADD1]]
 ; CHECK-NEXT:    ret float [[ADD2]]
 ;
-  %gep1 = getelementptr float, float* %p, i64 1
-  %gep2 = getelementptr float, float* %p, i64 2
-  %p1 = load float, float* %gep1
-  %p2 = load float, float* %gep2
+  %gep1 = getelementptr float, ptr %p, i64 1
+  %gep2 = getelementptr float, ptr %p, i64 2
+  %p1 = load float, ptr %gep1
+  %p2 = load float, ptr %gep2
   %mul1 = fmul float %p1, %x
   %mul2 = fmul float %p2, %y
   %add1 = fadd float %mul1, %z

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
index bfffc1334879d..2ea7f191947b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
 
-define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
+define void @mainTest(i32 %param, ptr %vals, i32 %len) {
 ; CHECK-LABEL: @mainTest(
 ; CHECK-NEXT:  bci_15.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 31>, i32 [[PARAM:%.*]], i32 0
@@ -12,7 +12,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i32> [[SHUFFLE]], <i32 -1, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
-; CHECK-NEXT:    store atomic i32 [[TMP4]], i32* [[VALS:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 [[TMP4]], ptr [[VALS:%.*]] unordered, align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[V44:%.*]] = add i32 [[TMP2]], 16
@@ -29,7 +29,7 @@ bci_15:                                       ; preds = %bci_15.preheader, %bci_
   %local_0_ = phi i32 [ %v43, %bci_15 ], [ %param, %bci_15.preheader ]
   %local_4_ = phi i32 [ %v44, %bci_15 ], [ 31, %bci_15.preheader ]
   %v12 = add i32 %local_0_, -1
-  store atomic i32 %local_0_, i32 * %vals unordered, align 4
+  store atomic i32 %local_0_, ptr %vals unordered, align 4
   %v13 = add i32 %local_4_, 1
   %v14 = and i32 %local_4_, %v12
   %v15 = add i32 %local_4_, 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
index f726a1ab5e524..99f0b4e9f6244 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -17,51 +17,51 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @addsub() #0 {
 ; CHECK-LABEL: @addsub(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @c to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @d to <4 x i32>*), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @e to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @a, align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
-  %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
+  %0 = load i32, ptr @b, align 4
+  %1 = load i32, ptr @c, align 4
   %add = add nsw i32 %0, %1
-  %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
-  %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
+  %2 = load i32, ptr @d, align 4
+  %3 = load i32, ptr @e, align 4
   %add1 = add nsw i32 %2, %3
   %add2 = add nsw i32 %add, %add1
-  store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
-  %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
-  %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
+  store i32 %add2, ptr @a, align 4
+  %4 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
+  %5 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
   %add3 = add nsw i32 %4, %5
-  %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
-  %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
+  %6 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
+  %7 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
   %add4 = add nsw i32 %6, %7
   %sub = sub nsw i32 %add3, %add4
-  store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
-  %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
-  %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
+  store i32 %sub, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
+  %8 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
+  %9 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
   %add5 = add nsw i32 %8, %9
-  %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
-  %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
+  %10 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
+  %11 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
   %add6 = add nsw i32 %10, %11
   %add7 = add nsw i32 %add5, %add6
-  store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
-  %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
-  %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
+  store i32 %add7, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
+  %12 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
+  %13 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
-  %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
+  %14 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
+  %15 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %sub10 = sub nsw i32 %add8, %add9
-  store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
+  store i32 %sub10, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
   ret void
 }
 
@@ -69,51 +69,51 @@ entry:
 define void @subadd() #0 {
 ; CHECK-LABEL: @subadd(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @c to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @d to <4 x i32>*), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @e to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @a, align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
-  %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
+  %0 = load i32, ptr @b, align 4
+  %1 = load i32, ptr @c, align 4
   %add = add nsw i32 %0, %1
-  %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
-  %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
+  %2 = load i32, ptr @d, align 4
+  %3 = load i32, ptr @e, align 4
   %add1 = add nsw i32 %2, %3
   %sub = sub nsw i32 %add, %add1
-  store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
-  %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
-  %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
+  store i32 %sub, ptr @a, align 4
+  %4 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
+  %5 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
   %add2 = add nsw i32 %4, %5
-  %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
-  %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
+  %6 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
+  %7 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
   %add3 = add nsw i32 %6, %7
   %add4 = add nsw i32 %add2, %add3
-  store i32 %add4, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
-  %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
-  %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
+  store i32 %add4, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
+  %8 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
+  %9 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
   %add5 = add nsw i32 %8, %9
-  %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
-  %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
+  %10 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
+  %11 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
   %add6 = add nsw i32 %10, %11
   %sub7 = sub nsw i32 %add5, %add6
-  store i32 %sub7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
-  %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
-  %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
+  store i32 %sub7, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
+  %12 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
+  %13 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
-  %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
+  %14 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
+  %15 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %add10 = add nsw i32 %add8, %add9
-  store i32 %add10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
+  store i32 %add10, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
   ret void
 }
 
@@ -121,31 +121,31 @@ entry:
 define void @faddfsub() #0 {
 ; CHECK-LABEL: @faddfsub(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fb to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr @fb, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @fc, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([4 x float]* @fa to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr @fa, align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, ptr @fb, align 4
+  %1 = load float, ptr @fc, align 4
   %add = fadd float %0, %1
-  store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
+  store float %add, ptr @fa, align 4
+  %2 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 1), align 4
   %sub = fsub float %2, %3
-  store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
+  store float %sub, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
+  %5 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 2), align 4
   %add1 = fadd float %4, %5
-  store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
+  store float %add1, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+  %6 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 3), align 4
+  %7 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 3), align 4
   %sub2 = fsub float %6, %7
-  store float %sub2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
+  store float %sub2, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 3), align 4
   ret void
 }
 
@@ -153,31 +153,31 @@ entry:
 define void @fsubfadd() #0 {
 ; CHECK-LABEL: @fsubfadd(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fb to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr @fb, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @fc, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([4 x float]* @fa to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr @fa, align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, ptr @fb, align 4
+  %1 = load float, ptr @fc, align 4
   %sub = fsub float %0, %1
-  store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
+  store float %sub, ptr @fa, align 4
+  %2 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 1), align 4
   %add = fadd float %2, %3
-  store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
+  store float %add, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
+  %5 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 2), align 4
   %sub1 = fsub float %4, %5
-  store float %sub1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
+  store float %sub1, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+  %6 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 3), align 4
+  %7 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 3), align 4
   %add2 = fadd float %6, %7
-  store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
+  store float %add2, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 3), align 4
   ret void
 }
 
@@ -185,31 +185,31 @@ entry:
 define void @faddfsub_select() #0 {
 ; CHECK-LABEL: @faddfsub_select(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fb to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr @fb, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @fc, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([4 x float]* @fa to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr @fa, align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, ptr @fb, align 4
+  %1 = load float, ptr @fc, align 4
   %add = fadd float %0, %1
-  store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
+  store float %add, ptr @fa, align 4
+  %2 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 1), align 4
   %add1 = fadd float %2, %3
-  store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
+  store float %add1, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
+  %5 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 2), align 4
   %add2 = fadd float %4, %5
-  store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
+  store float %add2, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+  %6 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 3), align 4
+  %7 = load float, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 3), align 4
   %sub = fsub float %6, %7
-  store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
+  store float %sub, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 3), align 4
   ret void
 }
 
@@ -221,30 +221,30 @@ entry:
 
 define void @reorder_alt() #0 {
 ; CHECK-LABEL: @reorder_alt(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fa to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fb to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @fa, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @fb, align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr @fc, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-  %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
+  %1 = load float, ptr @fb, align 4
+  %2 = load float, ptr @fa, align 4
   %3 = fadd float %1, %2
-  store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
-  %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-  %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
+  store float %3, ptr @fc, align 4
+  %4 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+  %5 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
   %6 = fsub float %4, %5
-  store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
-  %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
-  %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
+  store float %6, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 1), align 4
+  %7 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+  %8 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
   %9 = fadd float %7, %8
-  store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
-  %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
-  %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
+  store float %9, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 2), align 4
+  %10 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 3), align 4
+  %11 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 3), align 4
   %12 = fsub float %10, %11
-  store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
+  store float %12, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 3), align 4
   ret void
 }
 
@@ -256,42 +256,42 @@ define void @reorder_alt() #0 {
 
 define void @reorder_alt_subTree() #0 {
 ; CHECK-LABEL: @reorder_alt_subTree(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fa to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fd to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast ([4 x float]* @fb to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @fa, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @fd, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr @fb, align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <4 x float> [[TMP1]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub <4 x float> [[TMP1]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP9]], <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP9]], ptr @fc, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-  %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 0), align 4
+  %1 = load float, ptr @fa, align 4
+  %2 = load float, ptr @fb, align 4
+  %3 = load float, ptr @fd, align 4
   %4 = fsub float %2, %3
   %5 = fadd float %1, %4
-  store float %5, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
-  %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-  %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
-  %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 1), align 4
+  store float %5, ptr @fc, align 4
+  %6 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+  %7 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
+  %8 = load float, ptr getelementptr inbounds ([4 x float], ptr @fd, i32 0, i64 1), align 4
   %9 = fadd float %7, %8
   %10 = fsub float %6, %9
-  store float %10, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
-  %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
-  %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
-  %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 2), align 4
+  store float %10, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 1), align 4
+  %11 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+  %12 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
+  %13 = load float, ptr getelementptr inbounds ([4 x float], ptr @fd, i32 0, i64 2), align 4
   %14 = fsub float %12, %13
   %15 = fadd float %11, %14
-  store float %15, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
-  %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
-  %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 3), align 4
-  %18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
+  store float %15, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 2), align 4
+  %16 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 3), align 4
+  %17 = load float, ptr getelementptr inbounds ([4 x float], ptr @fd, i32 0, i64 3), align 4
+  %18 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 3), align 4
   %19 = fadd float %17, %18
   %20 = fsub float %16, %19
-  store float %20, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
+  store float %20, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 3), align 4
   ret void
 }
 
@@ -299,49 +299,45 @@ define void @reorder_alt_subTree() #0 {
 ;  c[0] = (a[0]+b[0])-d[0];
 ;  c[1] = d[1]+(a[1]+b[1]); //swapped d[1] and (a[1]+b[1])
 
-define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) {
+define void @reorder_alt_rightsubTree(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %d) {
 ; CHECK-LABEL: @reorder_alt_rightsubTree(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[D:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[D:%.*]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP7]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %1 = load double, double* %a
-  %2 = load double, double* %b
+  %1 = load double, ptr %a
+  %2 = load double, ptr %b
   %3 = fadd double %1, %2
-  %4 = load double, double* %d
+  %4 = load double, ptr %d
   %5 = fsub double %3, %4
-  store double %5, double* %c
-  %6 = getelementptr inbounds double, double* %d, i64 1
-  %7 = load double, double* %6
-  %8 = getelementptr inbounds double, double* %a, i64 1
-  %9 = load double, double* %8
-  %10 = getelementptr inbounds double, double* %b, i64 1
-  %11 = load double, double* %10
+  store double %5, ptr %c
+  %6 = getelementptr inbounds double, ptr %d, i64 1
+  %7 = load double, ptr %6
+  %8 = getelementptr inbounds double, ptr %a, i64 1
+  %9 = load double, ptr %8
+  %10 = getelementptr inbounds double, ptr %b, i64 1
+  %11 = load double, ptr %10
   %12 = fadd double %9, %11
   %13 = fadd double %7, %12
-  %14 = getelementptr inbounds double, double* %c, i64 1
-  store double %13, double* %14
+  %14 = getelementptr inbounds double, ptr %c, i64 1
+  store double %13, ptr %14
   ret void
 }
 
 define void @vec_shuff_reorder() #0 {
 ; CHECK-LABEL: @vec_shuff_reorder(
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2) to <2 x float>*), align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2) to <2 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr @fb, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr @fa, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -353,25 +349,25 @@ define void @vec_shuff_reorder() #0 {
 ; CHECK-NEXT:    [[TMP15:%.*]] = fadd <4 x float> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = fsub <4 x float> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP17]], <4 x float>* bitcast ([4 x float]* @fc to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP17]], ptr @fc, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
-  %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
+  %1 = load float, ptr @fb, align 4
+  %2 = load float, ptr @fa, align 4
   %3 = fadd float %1, %2
-  store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
-  %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
-  %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
+  store float %3, ptr @fc, align 4
+  %4 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 1), align 4
+  %5 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 1), align 4
   %6 = fsub float %4, %5
-  store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
-  %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
-  %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
+  store float %6, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 1), align 4
+  %7 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+  %8 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
   %9 = fadd float %7, %8
-  store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
-  %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
-  %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
+  store float %9, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 2), align 4
+  %10 = load float, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 3), align 4
+  %11 = load float, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 3), align 4
   %12 = fsub float %10, %11
-  store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
+  store float %12, ptr getelementptr inbounds ([4 x float], ptr @fc, i32 0, i64 3), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
index 980b53bb01015..d04f3d7a50f2e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=x86_64-unknown-linux -mcpu=corei7 -passes=slp-vectorizer < %s | FileCheck %s
 
-%struct.S = type { i8*, i8* }
+%struct.S = type { ptr, ptr }
 
 @kS0 = common global %struct.S zeroinitializer, align 8
 
 define { i64, i64 } @getS() {
 ; CHECK-LABEL: @getS(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @kS0, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @kS0, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret { i64, i64 } [[TMP3]]
 ;
 entry:
-  %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-  %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+  %0 = load i64, ptr @kS0, align 8
+  %1 = load i64, ptr getelementptr inbounds (%struct.S, ptr @kS0, i64 0, i32 1), align 8
   %2 = insertvalue { i64, i64 } undef, i64 %0, 0
   %3 = insertvalue { i64, i64 } %2, i64 %1, 1
   ret { i64, i64 } %3

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/align.ll b/llvm/test/Transforms/SLPVectorizer/X86/align.ll
index bc80d06738ceb..58bcc3f372cfb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/align.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/align.ll
@@ -5,63 +5,58 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) {
+define void @test1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AGG_TMP_I_I_SROA_0:%.*]] = alloca [3 x double], align 16
-; CHECK-NEXT:    [[STORE1:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[AGG_TMP_I_I_SROA_0]], i64 0, i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[STORE1:%.*]] = getelementptr inbounds [3 x double], ptr [[AGG_TMP_I_I_SROA_0]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[STORE1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[STORE1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
-  %i0 = load double, double* %a
-  %i1 = load double, double* %b
+  %i0 = load double, ptr %a
+  %i1 = load double, ptr %b
   %mul = fmul double %i0, %i1
-  %store1 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
-  %store2 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %store1 = getelementptr inbounds [3 x double], ptr %agg.tmp.i.i.sroa.0, i64 0, i64 1
+  %store2 = getelementptr inbounds [3 x double], ptr %agg.tmp.i.i.sroa.0, i64 0, i64 2
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %store1
-  store double %mul5, double* %store2, align 16
+  store double %mul, ptr %store1
+  store double %mul5, ptr %store2, align 16
   ret void
 }
 
 ; Float has 4 byte abi alignment on x86_64. We must use the alignment of the
 ; value being loaded/stored not the alignment of the pointer type.
 
-define void @test2(float * %a, float * %b) {
+define void @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %l0 = load float, float* %a
-  %a1 = getelementptr inbounds float, float* %a, i64 1
-  %l1 = load float, float* %a1
-  %a2 = getelementptr inbounds float, float* %a, i64 2
-  %l2 = load float, float* %a2
-  %a3 = getelementptr inbounds float, float* %a, i64 3
-  %l3 = load float, float* %a3
-  store float %l0, float* %b
-  %b1 = getelementptr inbounds float, float* %b, i64 1
-  store float %l1, float* %b1
-  %b2 = getelementptr inbounds float, float* %b, i64 2
-  store float %l2, float* %b2
-  %b3 = getelementptr inbounds float, float* %b, i64 3
-  store float %l3, float* %b3
+  %l0 = load float, ptr %a
+  %a1 = getelementptr inbounds float, ptr %a, i64 1
+  %l1 = load float, ptr %a1
+  %a2 = getelementptr inbounds float, ptr %a, i64 2
+  %l2 = load float, ptr %a2
+  %a3 = getelementptr inbounds float, ptr %a, i64 3
+  %l3 = load float, ptr %a3
+  store float %l0, ptr %b
+  %b1 = getelementptr inbounds float, ptr %b, i64 1
+  store float %l1, ptr %b1
+  %b2 = getelementptr inbounds float, ptr %b, i64 2
+  store float %l2, ptr %b2
+  %b3 = getelementptr inbounds float, ptr %b, i64 3
+  store float %l3, ptr %b3
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
index 6fda6be46eec2..ecf85159efdfb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
@@ -4,7 +4,7 @@
 define i16 @test(i16 %call37) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = load i16, i16* undef, align 2
+; CHECK-NEXT:    [[CALL:%.*]] = load i16, ptr undef, align 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i16> <i16 poison, i16 0, i16 0, i16 poison, i16 poison, i16 0, i16 poison, i16 0>, i16 [[CALL37:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 5, i32 3, i32 7>
@@ -17,7 +17,7 @@ define i16 @test(i16 %call37) {
 ; CHECK-NEXT:    ret i16 [[OP_RDX]]
 ;
 entry:
-  %call = load i16, i16* undef, align 2
+  %call = load i16, ptr undef, align 2
   %0 = icmp slt i16 %call, 0
   %cond = zext i1 %0 to i16
   %1 = add i16 %cond, 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
index aef13f1c04ef1..c37f260fad015 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
@@ -27,58 +27,58 @@ declare i8  @llvm.abs.i8 (i8, i1)
 
 define void @abs_v8i64() {
 ; SSE-LABEL: @abs_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP1]], i1 false)
-; SSE-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP2]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP3]], i1 false)
-; SSE-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP5]], i1 false)
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 false)
-; SSE-NEXT:    store <2 x i64> [[TMP8]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @abs_v8i64(
-; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
+; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
 ; SLM-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP1]], i1 false)
-; SLM-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP2]], ptr @c64, align 8
+; SLM-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
 ; SLM-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP3]], i1 false)
-; SLM-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
 ; SLM-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP5]], i1 false)
-; SLM-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
 ; SLM-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 false)
-; SLM-NEXT:    store <2 x i64> [[TMP8]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP8]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @abs_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> [[TMP1]], i1 false)
-; AVX-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP2]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP4:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> [[TMP3]], i1 false)
-; AVX-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @abs_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP2]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.abs.i64(i64 %a0, i1 false)
   %r1 = call i64 @llvm.abs.i64(i64 %a1, i1 false)
   %r2 = call i64 @llvm.abs.i64(i64 %a2, i1 false)
@@ -87,79 +87,79 @@ define void @abs_v8i64() {
   %r5 = call i64 @llvm.abs.i64(i64 %a5, i1 false)
   %r6 = call i64 @llvm.abs.i64(i64 %a6, i1 false)
   %r7 = call i64 @llvm.abs.i64(i64 %a7, i1 false)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @abs_v16i32() {
 ; SSE-LABEL: @abs_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP3]], i1 false)
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP5]], i1 false)
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP7]], i1 false)
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @abs_v16i32(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SLM-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SLM-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP3]], i1 false)
-; SLM-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP5]], i1 false)
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP7]], i1 false)
-; SLM-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @abs_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP1]], i1 false)
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP3]], i1 false)
-; AVX-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @abs_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.abs.i32(i32 %a0, i1 false)
   %r1  = call i32 @llvm.abs.i32(i32 %a1, i1 false)
   %r2  = call i32 @llvm.abs.i32(i32 %a2, i1 false)
@@ -176,103 +176,103 @@ define void @abs_v16i32() {
   %r13 = call i32 @llvm.abs.i32(i32 %a13, i1 false)
   %r14 = call i32 @llvm.abs.i32(i32 %a14, i1 false)
   %r15 = call i32 @llvm.abs.i32(i32 %a15, i1 false)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @abs_v32i16() {
 ; SSE-LABEL: @abs_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP1]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP3]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP5]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP7]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP8]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @abs_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
 ; SLM-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP1]], i1 false)
-; SLM-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP2]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP3]], i1 false)
-; SLM-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP5]], i1 false)
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP7]], i1 false)
-; SLM-NEXT:    store <8 x i16> [[TMP8]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @abs_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP1]], i1 false)
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP3:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP3:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP4:%.*]] = call <16 x i16> @llvm.abs.v16i16(<16 x i16> [[TMP3]], i1 false)
-; AVX-NEXT:    store <16 x i16> [[TMP4]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @abs_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <32 x i16> @llvm.abs.v32i16(<32 x i16> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <32 x i16> [[TMP2]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP2]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.abs.i16(i16 %a0, i1 false)
   %r1  = call i16 @llvm.abs.i16(i16 %a1, i1 false)
   %r2  = call i16 @llvm.abs.i16(i16 %a2, i1 false)
@@ -305,151 +305,151 @@ define void @abs_v32i16() {
   %r29 = call i16 @llvm.abs.i16(i16 %a29, i1 false)
   %r30 = call i16 @llvm.abs.i16(i16 %a30, i1 false)
   %r31 = call i16 @llvm.abs.i16(i16 %a31, i1 false)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @abs_v64i8() {
 ; SSE-LABEL: @abs_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP1]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP3]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP5]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP7]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP8]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @abs_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
 ; SLM-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP1]], i1 false)
-; SLM-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP2]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP3]], i1 false)
-; SLM-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP5]], i1 false)
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP7]], i1 false)
-; SLM-NEXT:    store <16 x i8> [[TMP8]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @abs_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP1]], i1 false)
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP3:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP3:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP4:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> [[TMP3]], i1 false)
-; AVX-NEXT:    store <32 x i8> [[TMP4]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @abs_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <64 x i8> [[TMP2]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP2]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.abs.i8(i8 %a0, i1 false)
   %r1  = call i8 @llvm.abs.i8(i8 %a1, i1 false)
   %r2  = call i8 @llvm.abs.i8(i8 %a2, i1 false)
@@ -514,69 +514,69 @@ define void @abs_v64i8() {
   %r61 = call i8 @llvm.abs.i8(i8 %a61, i1 false)
   %r62 = call i8 @llvm.abs.i8(i8 %a62, i1 false)
   %r63 = call i8 @llvm.abs.i8(i8 %a63, i1 false)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
index 1fb92aa41d6d5..8d7dd9b9621c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
@@ -27,22 +27,22 @@ declare {i8 , i1} @llvm.sadd.with.overflow.i8 (i8 , i8 )
 
 define void @add_v8i64() {
 ; CHECK-LABEL: @add_v8i64(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A0]], i64 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A1]], i64 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A2]], i64 [[B2]])
@@ -59,32 +59,32 @@ define void @add_v8i64() {
 ; CHECK-NEXT:    [[R5:%.*]] = extractvalue { i64, i1 } [[C5]], 0
 ; CHECK-NEXT:    [[R6:%.*]] = extractvalue { i64, i1 } [[C6]], 0
 ; CHECK-NEXT:    [[R7:%.*]] = extractvalue { i64, i1 } [[C7]], 0
-; CHECK-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; CHECK-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; CHECK-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; CHECK-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; CHECK-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; CHECK-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; CHECK-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; CHECK-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; CHECK-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; CHECK-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; CHECK-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; CHECK-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; CHECK-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %c0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a0, i64 %b0)
   %c1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a1, i64 %b1)
   %c2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a2, i64 %b2)
@@ -101,51 +101,51 @@ define void @add_v8i64() {
   %r5 = extractvalue {i64, i1} %c5, 0
   %r6 = extractvalue {i64, i1} %c6, 0
   %r7 = extractvalue {i64, i1} %c7, 0
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @add_v16i32() {
 ; CHECK-LABEL: @add_v16i32(
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; CHECK-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; CHECK-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; CHECK-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    [[C0:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A0]], i32 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A1]], i32 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A2]], i32 [[B2]])
@@ -178,56 +178,56 @@ define void @add_v16i32() {
 ; CHECK-NEXT:    [[R13:%.*]] = extractvalue { i32, i1 } [[C13]], 0
 ; CHECK-NEXT:    [[R14:%.*]] = extractvalue { i32, i1 } [[C14]], 0
 ; CHECK-NEXT:    [[R15:%.*]] = extractvalue { i32, i1 } [[C15]], 0
-; CHECK-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; CHECK-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; CHECK-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; CHECK-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; CHECK-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; CHECK-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; CHECK-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; CHECK-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; CHECK-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; CHECK-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; CHECK-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; CHECK-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; CHECK-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; CHECK-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; CHECK-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; CHECK-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; CHECK-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; CHECK-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; CHECK-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; CHECK-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; CHECK-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; CHECK-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; CHECK-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; CHECK-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; CHECK-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; CHECK-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; CHECK-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; CHECK-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; CHECK-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; CHECK-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; CHECK-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %c0  = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a0 , i32 %b0 )
   %c1  = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a1 , i32 %b1 )
   %c2  = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a2 , i32 %b2 )
@@ -260,91 +260,91 @@ define void @add_v16i32() {
   %r13 = extractvalue {i32, i1} %c13, 0
   %r14 = extractvalue {i32, i1} %c14, 0
   %r15 = extractvalue {i32, i1} %c15, 0
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @add_v32i16() {
 ; CHECK-LABEL: @add_v32i16(
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr @a16, align 2
+; CHECK-NEXT:    [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr @b16, align 2
+; CHECK-NEXT:    [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    [[C0:%.*]] = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 [[A0]], i16 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 [[A1]], i16 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 [[A2]], i16 [[B2]])
@@ -409,104 +409,104 @@ define void @add_v32i16() {
 ; CHECK-NEXT:    [[R29:%.*]] = extractvalue { i16, i1 } [[C29]], 0
 ; CHECK-NEXT:    [[R30:%.*]] = extractvalue { i16, i1 } [[C30]], 0
 ; CHECK-NEXT:    [[R31:%.*]] = extractvalue { i16, i1 } [[C31]], 0
-; CHECK-NEXT:    store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
-; CHECK-NEXT:    store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
-; CHECK-NEXT:    store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
-; CHECK-NEXT:    store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
-; CHECK-NEXT:    store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
-; CHECK-NEXT:    store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
-; CHECK-NEXT:    store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
-; CHECK-NEXT:    store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
-; CHECK-NEXT:    store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
-; CHECK-NEXT:    store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
-; CHECK-NEXT:    store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-; CHECK-NEXT:    store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-; CHECK-NEXT:    store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-; CHECK-NEXT:    store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-; CHECK-NEXT:    store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-; CHECK-NEXT:    store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-; CHECK-NEXT:    store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-; CHECK-NEXT:    store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-; CHECK-NEXT:    store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-; CHECK-NEXT:    store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-; CHECK-NEXT:    store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-; CHECK-NEXT:    store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-; CHECK-NEXT:    store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-; CHECK-NEXT:    store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-; CHECK-NEXT:    store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-; CHECK-NEXT:    store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-; CHECK-NEXT:    store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-; CHECK-NEXT:    store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-; CHECK-NEXT:    store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-; CHECK-NEXT:    store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-; CHECK-NEXT:    store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-; CHECK-NEXT:    store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+; CHECK-NEXT:    store i16 [[R0]], ptr @c16, align 2
+; CHECK-NEXT:    store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1), align 2
+; CHECK-NEXT:    store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2), align 2
+; CHECK-NEXT:    store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3), align 2
+; CHECK-NEXT:    store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4), align 2
+; CHECK-NEXT:    store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5), align 2
+; CHECK-NEXT:    store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6), align 2
+; CHECK-NEXT:    store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7), align 2
+; CHECK-NEXT:    store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; CHECK-NEXT:    store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9), align 2
+; CHECK-NEXT:    store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+; CHECK-NEXT:    store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+; CHECK-NEXT:    store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+; CHECK-NEXT:    store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+; CHECK-NEXT:    store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+; CHECK-NEXT:    store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+; CHECK-NEXT:    store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; CHECK-NEXT:    store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+; CHECK-NEXT:    store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+; CHECK-NEXT:    store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+; CHECK-NEXT:    store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+; CHECK-NEXT:    store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+; CHECK-NEXT:    store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+; CHECK-NEXT:    store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+; CHECK-NEXT:    store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; CHECK-NEXT:    store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+; CHECK-NEXT:    store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+; CHECK-NEXT:    store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+; CHECK-NEXT:    store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+; CHECK-NEXT:    store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+; CHECK-NEXT:    store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+; CHECK-NEXT:    store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %c0  = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a0 , i16 %b0 )
   %c1  = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a1 , i16 %b1 )
   %c2  = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a2 , i16 %b2 )
@@ -571,171 +571,171 @@ define void @add_v32i16() {
   %r29 = extractvalue {i16, i1} %c29, 0
   %r30 = extractvalue {i16, i1} %c30, 0
   %r31 = extractvalue {i16, i1} %c31, 0
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @add_v64i8() {
 ; CHECK-LABEL: @add_v64i8(
-; CHECK-NEXT:    [[A0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[A1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[A2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[A3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[A4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[A5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[A6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[A7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[A8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[A9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[A10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[A11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[A12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[A13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[A14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[A15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[A16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[A17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[A18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[A19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[A20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[A21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[A22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[A23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[A24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[A25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[A26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[A27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[A28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[A29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[A30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[A31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[A32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[A33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[A34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[A35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[A36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[A37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[A38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[A39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[A40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[A41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[A42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[A43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[A44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[A45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[A46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[A47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[A48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[A49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[A50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[A51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[A52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[A53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[A54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[A55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[A56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[A57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[A58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[A59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[A60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[A61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[A62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[A63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-; CHECK-NEXT:    [[B0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[B1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[B2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[B3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[B4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[B5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[B6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[B7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[B8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[B9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[B10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[B11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[B12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[B13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[B14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[B15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[B16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[B17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[B18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[B19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[B20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[B21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[B22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[B23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[B24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[B25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[B26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[B27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[B28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[B29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[B30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[B31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[B32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[B33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[B34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[B35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[B36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[B37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[B38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[B39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[B40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[B41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[B42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[B43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[B44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[B45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[B46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[B47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[B48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[B49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[B50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[B51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[B52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[B53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[B54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[B55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[B56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[B57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[B58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[B59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[B60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[B61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[B62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[B63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[A0:%.*]] = load i8, ptr @a8, align 1
+; CHECK-NEXT:    [[A1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[A2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[A3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[A4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[A5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[A6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[A7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[A8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[A9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[A10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[A11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[A12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[A13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[A14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[A15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[A16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[A17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[A18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[A19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[A20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[A21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[A22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[A23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[A24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[A25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[A26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[A27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[A28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[A29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[A30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[A31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[A32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[A33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[A34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[A35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[A36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[A37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[A38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[A39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[A40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[A41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[A42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[A43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[A44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[A45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[A46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[A47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[A48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[A49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[A50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[A51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[A52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[A53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[A54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[A55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[A56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[A57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[A58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[A59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[A60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[A61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[A62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[A63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[B0:%.*]] = load i8, ptr @b8, align 1
+; CHECK-NEXT:    [[B1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[B2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[B3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[B4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[B5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[B6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[B7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[B8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[B9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[B10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[B11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[B12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[B13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[B14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[B15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[B16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[B17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[B18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[B19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[B20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[B21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[B22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[B23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[B24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[B25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[B26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[B27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[B28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[B29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[B30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[B31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[B32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[B33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[B34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[B35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[B36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[B37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[B38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[B39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[B40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[B41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[B42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[B43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[B44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[B45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[B46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[B47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[B48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[B49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[B50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[B51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[B52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[B53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[B54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[B55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[B56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[B57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[B58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[B59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[B60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[B61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[B62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[B63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    [[C0:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A0]], i8 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A1]], i8 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A2]], i8 [[B2]])
@@ -864,200 +864,200 @@ define void @add_v64i8() {
 ; CHECK-NEXT:    [[R61:%.*]] = extractvalue { i8, i1 } [[C61]], 0
 ; CHECK-NEXT:    [[R62:%.*]] = extractvalue { i8, i1 } [[C62]], 0
 ; CHECK-NEXT:    [[R63:%.*]] = extractvalue { i8, i1 } [[C63]], 0
-; CHECK-NEXT:    store i8 [[R0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0), align 1
-; CHECK-NEXT:    store i8 [[R1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1), align 1
-; CHECK-NEXT:    store i8 [[R2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2), align 1
-; CHECK-NEXT:    store i8 [[R3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3), align 1
-; CHECK-NEXT:    store i8 [[R4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4), align 1
-; CHECK-NEXT:    store i8 [[R5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5), align 1
-; CHECK-NEXT:    store i8 [[R6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6), align 1
-; CHECK-NEXT:    store i8 [[R7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7), align 1
-; CHECK-NEXT:    store i8 [[R8]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8), align 1
-; CHECK-NEXT:    store i8 [[R9]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9), align 1
-; CHECK-NEXT:    store i8 [[R10]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-; CHECK-NEXT:    store i8 [[R11]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-; CHECK-NEXT:    store i8 [[R12]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-; CHECK-NEXT:    store i8 [[R13]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-; CHECK-NEXT:    store i8 [[R14]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-; CHECK-NEXT:    store i8 [[R15]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-; CHECK-NEXT:    store i8 [[R16]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-; CHECK-NEXT:    store i8 [[R17]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-; CHECK-NEXT:    store i8 [[R18]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-; CHECK-NEXT:    store i8 [[R19]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-; CHECK-NEXT:    store i8 [[R20]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-; CHECK-NEXT:    store i8 [[R21]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-; CHECK-NEXT:    store i8 [[R22]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-; CHECK-NEXT:    store i8 [[R23]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-; CHECK-NEXT:    store i8 [[R24]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-; CHECK-NEXT:    store i8 [[R25]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-; CHECK-NEXT:    store i8 [[R26]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-; CHECK-NEXT:    store i8 [[R27]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-; CHECK-NEXT:    store i8 [[R28]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-; CHECK-NEXT:    store i8 [[R29]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-; CHECK-NEXT:    store i8 [[R30]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-; CHECK-NEXT:    store i8 [[R31]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-; CHECK-NEXT:    store i8 [[R32]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-; CHECK-NEXT:    store i8 [[R33]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-; CHECK-NEXT:    store i8 [[R34]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-; CHECK-NEXT:    store i8 [[R35]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-; CHECK-NEXT:    store i8 [[R36]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-; CHECK-NEXT:    store i8 [[R37]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-; CHECK-NEXT:    store i8 [[R38]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-; CHECK-NEXT:    store i8 [[R39]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-; CHECK-NEXT:    store i8 [[R40]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-; CHECK-NEXT:    store i8 [[R41]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-; CHECK-NEXT:    store i8 [[R42]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-; CHECK-NEXT:    store i8 [[R43]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-; CHECK-NEXT:    store i8 [[R44]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-; CHECK-NEXT:    store i8 [[R45]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-; CHECK-NEXT:    store i8 [[R46]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-; CHECK-NEXT:    store i8 [[R47]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-; CHECK-NEXT:    store i8 [[R48]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-; CHECK-NEXT:    store i8 [[R49]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-; CHECK-NEXT:    store i8 [[R50]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-; CHECK-NEXT:    store i8 [[R51]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-; CHECK-NEXT:    store i8 [[R52]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-; CHECK-NEXT:    store i8 [[R53]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-; CHECK-NEXT:    store i8 [[R54]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-; CHECK-NEXT:    store i8 [[R55]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-; CHECK-NEXT:    store i8 [[R56]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-; CHECK-NEXT:    store i8 [[R57]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-; CHECK-NEXT:    store i8 [[R58]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-; CHECK-NEXT:    store i8 [[R59]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-; CHECK-NEXT:    store i8 [[R60]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-; CHECK-NEXT:    store i8 [[R61]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-; CHECK-NEXT:    store i8 [[R62]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-; CHECK-NEXT:    store i8 [[R63]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+; CHECK-NEXT:    store i8 [[R0]], ptr @c8, align 1
+; CHECK-NEXT:    store i8 [[R1]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1), align 1
+; CHECK-NEXT:    store i8 [[R2]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2), align 1
+; CHECK-NEXT:    store i8 [[R3]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3), align 1
+; CHECK-NEXT:    store i8 [[R4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4), align 1
+; CHECK-NEXT:    store i8 [[R5]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5), align 1
+; CHECK-NEXT:    store i8 [[R6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6), align 1
+; CHECK-NEXT:    store i8 [[R7]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7), align 1
+; CHECK-NEXT:    store i8 [[R8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8), align 1
+; CHECK-NEXT:    store i8 [[R9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9), align 1
+; CHECK-NEXT:    store i8 [[R10]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+; CHECK-NEXT:    store i8 [[R11]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+; CHECK-NEXT:    store i8 [[R12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+; CHECK-NEXT:    store i8 [[R13]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+; CHECK-NEXT:    store i8 [[R14]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+; CHECK-NEXT:    store i8 [[R15]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+; CHECK-NEXT:    store i8 [[R16]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; CHECK-NEXT:    store i8 [[R17]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+; CHECK-NEXT:    store i8 [[R18]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+; CHECK-NEXT:    store i8 [[R19]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+; CHECK-NEXT:    store i8 [[R20]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+; CHECK-NEXT:    store i8 [[R21]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+; CHECK-NEXT:    store i8 [[R22]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+; CHECK-NEXT:    store i8 [[R23]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+; CHECK-NEXT:    store i8 [[R24]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+; CHECK-NEXT:    store i8 [[R25]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+; CHECK-NEXT:    store i8 [[R26]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+; CHECK-NEXT:    store i8 [[R27]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+; CHECK-NEXT:    store i8 [[R28]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+; CHECK-NEXT:    store i8 [[R29]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+; CHECK-NEXT:    store i8 [[R30]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+; CHECK-NEXT:    store i8 [[R31]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+; CHECK-NEXT:    store i8 [[R32]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; CHECK-NEXT:    store i8 [[R33]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+; CHECK-NEXT:    store i8 [[R34]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+; CHECK-NEXT:    store i8 [[R35]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+; CHECK-NEXT:    store i8 [[R36]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+; CHECK-NEXT:    store i8 [[R37]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+; CHECK-NEXT:    store i8 [[R38]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+; CHECK-NEXT:    store i8 [[R39]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+; CHECK-NEXT:    store i8 [[R40]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+; CHECK-NEXT:    store i8 [[R41]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+; CHECK-NEXT:    store i8 [[R42]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+; CHECK-NEXT:    store i8 [[R43]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+; CHECK-NEXT:    store i8 [[R44]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+; CHECK-NEXT:    store i8 [[R45]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+; CHECK-NEXT:    store i8 [[R46]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+; CHECK-NEXT:    store i8 [[R47]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+; CHECK-NEXT:    store i8 [[R48]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; CHECK-NEXT:    store i8 [[R49]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+; CHECK-NEXT:    store i8 [[R50]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+; CHECK-NEXT:    store i8 [[R51]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+; CHECK-NEXT:    store i8 [[R52]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+; CHECK-NEXT:    store i8 [[R53]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+; CHECK-NEXT:    store i8 [[R54]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+; CHECK-NEXT:    store i8 [[R55]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+; CHECK-NEXT:    store i8 [[R56]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+; CHECK-NEXT:    store i8 [[R57]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+; CHECK-NEXT:    store i8 [[R58]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+; CHECK-NEXT:    store i8 [[R59]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+; CHECK-NEXT:    store i8 [[R60]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+; CHECK-NEXT:    store i8 [[R61]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+; CHECK-NEXT:    store i8 [[R62]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+; CHECK-NEXT:    store i8 [[R63]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %c0  = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a0 , i8 %b0 )
   %c1  = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a1 , i8 %b1 )
   %c2  = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a2 , i8 %b2 )
@@ -1186,69 +1186,69 @@ define void @add_v64i8() {
   %r61 = extractvalue {i8, i1} %c61, 0
   %r62 = extractvalue {i8, i1} %c62, 0
   %r63 = extractvalue {i8, i1} %c63, 0
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
index 87dd1e1d568e1..24c5fcb068086 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
@@ -27,22 +27,22 @@ declare i8  @llvm.sadd.sat.i8 (i8 , i8 )
 
 define void @add_v8i64() {
 ; SSE-LABEL: @add_v8i64(
-; SSE-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[R0:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A0]], i64 [[B0]])
 ; SSE-NEXT:    [[R1:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A1]], i64 [[B1]])
 ; SSE-NEXT:    [[R2:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A2]], i64 [[B2]])
@@ -51,33 +51,33 @@ define void @add_v8i64() {
 ; SSE-NEXT:    [[R5:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A5]], i64 [[B5]])
 ; SSE-NEXT:    [[R6:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A6]], i64 [[B6]])
 ; SSE-NEXT:    [[R7:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A7]], i64 [[B7]])
-; SSE-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v8i64(
-; SLM-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SLM-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SLM-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SLM-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SLM-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SLM-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SLM-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SLM-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SLM-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SLM-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SLM-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SLM-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SLM-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SLM-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SLM-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SLM-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SLM-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SLM-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SLM-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SLM-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SLM-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SLM-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SLM-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SLM-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SLM-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SLM-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SLM-NEXT:    [[R0:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A0]], i64 [[B0]])
 ; SLM-NEXT:    [[R1:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A1]], i64 [[B1]])
 ; SLM-NEXT:    [[R2:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A2]], i64 [[B2]])
@@ -86,79 +86,79 @@ define void @add_v8i64() {
 ; SLM-NEXT:    [[R5:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A5]], i64 [[B5]])
 ; SLM-NEXT:    [[R6:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A6]], i64 [[B6]])
 ; SLM-NEXT:    [[R7:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A7]], i64 [[B7]])
-; SLM-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SLM-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SLM-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SLM-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SLM-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SLM-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SLM-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SLM-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SLM-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SLM-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SLM-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SLM-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SLM-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX1-LABEL: @add_v8i64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; AVX1-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
-; AVX1-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; AVX1-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
-; AVX1-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-; AVX1-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @add_v8i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX2-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX2-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256BW-LABEL: @add_v8i64(
-; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX256BW-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr @b64, align 8
+; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    [[TMP5:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP3]])
 ; AVX256BW-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP2]], <4 x i64> [[TMP4]])
-; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], ptr @c64, align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    ret void
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.sadd.sat.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.sadd.sat.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.sadd.sat.i64(i64 %a2, i64 %b2)
@@ -167,106 +167,106 @@ define void @add_v8i64() {
   %r5 = call i64 @llvm.sadd.sat.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.sadd.sat.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.sadd.sat.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @add_v16i32() {
 ; SSE-LABEL: @add_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v16i32(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SLM-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SLM-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SLM-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.sadd.sat.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.sadd.sat.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.sadd.sat.i32(i32 %a2 , i32 %b2 )
@@ -283,146 +283,146 @@ define void @add_v16i32() {
   %r13 = call i32 @llvm.sadd.sat.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.sadd.sat.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.sadd.sat.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @add_v32i16() {
 ; SSE-LABEL: @add_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.sadd.sat.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.sadd.sat.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.sadd.sat.i16(i16 %a2 , i16 %b2 )
@@ -455,226 +455,226 @@ define void @add_v32i16() {
   %r29 = call i16 @llvm.sadd.sat.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.sadd.sat.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.sadd.sat.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @add_v64i8() {
 ; SSE-LABEL: @add_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.sadd.sat.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.sadd.sat.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.sadd.sat.i8(i8 %a2 , i8 %b2 )
@@ -739,69 +739,69 @@ define void @add_v64i8() {
   %r61 = call i8 @llvm.sadd.sat.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.sadd.sat.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.sadd.sat.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
index e76594d96c00f..fc67cec60f177 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
@@ -27,22 +27,22 @@ declare {i8 , i1} @llvm.uadd.with.overflow.i8 (i8 , i8 )
 
 define void @add_v8i64() {
 ; CHECK-LABEL: @add_v8i64(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A0]], i64 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A1]], i64 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A2]], i64 [[B2]])
@@ -59,32 +59,32 @@ define void @add_v8i64() {
 ; CHECK-NEXT:    [[R5:%.*]] = extractvalue { i64, i1 } [[C5]], 0
 ; CHECK-NEXT:    [[R6:%.*]] = extractvalue { i64, i1 } [[C6]], 0
 ; CHECK-NEXT:    [[R7:%.*]] = extractvalue { i64, i1 } [[C7]], 0
-; CHECK-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; CHECK-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; CHECK-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; CHECK-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; CHECK-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; CHECK-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; CHECK-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; CHECK-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; CHECK-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; CHECK-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; CHECK-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; CHECK-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; CHECK-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %c0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a0, i64 %b0)
   %c1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a1, i64 %b1)
   %c2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a2, i64 %b2)
@@ -101,51 +101,51 @@ define void @add_v8i64() {
   %r5 = extractvalue {i64, i1} %c5, 0
   %r6 = extractvalue {i64, i1} %c6, 0
   %r7 = extractvalue {i64, i1} %c7, 0
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @add_v16i32() {
 ; CHECK-LABEL: @add_v16i32(
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; CHECK-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; CHECK-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; CHECK-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    [[C0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A0]], i32 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A1]], i32 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A2]], i32 [[B2]])
@@ -178,56 +178,56 @@ define void @add_v16i32() {
 ; CHECK-NEXT:    [[R13:%.*]] = extractvalue { i32, i1 } [[C13]], 0
 ; CHECK-NEXT:    [[R14:%.*]] = extractvalue { i32, i1 } [[C14]], 0
 ; CHECK-NEXT:    [[R15:%.*]] = extractvalue { i32, i1 } [[C15]], 0
-; CHECK-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; CHECK-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; CHECK-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; CHECK-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; CHECK-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; CHECK-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; CHECK-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; CHECK-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; CHECK-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; CHECK-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; CHECK-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; CHECK-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; CHECK-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; CHECK-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; CHECK-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; CHECK-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; CHECK-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; CHECK-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; CHECK-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; CHECK-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; CHECK-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; CHECK-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; CHECK-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; CHECK-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; CHECK-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; CHECK-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; CHECK-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; CHECK-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; CHECK-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; CHECK-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; CHECK-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %c0  = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a0 , i32 %b0 )
   %c1  = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a1 , i32 %b1 )
   %c2  = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a2 , i32 %b2 )
@@ -260,91 +260,91 @@ define void @add_v16i32() {
   %r13 = extractvalue {i32, i1} %c13, 0
   %r14 = extractvalue {i32, i1} %c14, 0
   %r15 = extractvalue {i32, i1} %c15, 0
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @add_v32i16() {
 ; CHECK-LABEL: @add_v32i16(
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr @a16, align 2
+; CHECK-NEXT:    [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr @b16, align 2
+; CHECK-NEXT:    [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    [[C0:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 [[A0]], i16 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 [[A1]], i16 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 [[A2]], i16 [[B2]])
@@ -409,104 +409,104 @@ define void @add_v32i16() {
 ; CHECK-NEXT:    [[R29:%.*]] = extractvalue { i16, i1 } [[C29]], 0
 ; CHECK-NEXT:    [[R30:%.*]] = extractvalue { i16, i1 } [[C30]], 0
 ; CHECK-NEXT:    [[R31:%.*]] = extractvalue { i16, i1 } [[C31]], 0
-; CHECK-NEXT:    store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
-; CHECK-NEXT:    store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
-; CHECK-NEXT:    store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
-; CHECK-NEXT:    store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
-; CHECK-NEXT:    store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
-; CHECK-NEXT:    store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
-; CHECK-NEXT:    store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
-; CHECK-NEXT:    store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
-; CHECK-NEXT:    store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
-; CHECK-NEXT:    store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
-; CHECK-NEXT:    store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-; CHECK-NEXT:    store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-; CHECK-NEXT:    store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-; CHECK-NEXT:    store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-; CHECK-NEXT:    store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-; CHECK-NEXT:    store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-; CHECK-NEXT:    store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-; CHECK-NEXT:    store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-; CHECK-NEXT:    store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-; CHECK-NEXT:    store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-; CHECK-NEXT:    store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-; CHECK-NEXT:    store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-; CHECK-NEXT:    store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-; CHECK-NEXT:    store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-; CHECK-NEXT:    store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-; CHECK-NEXT:    store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-; CHECK-NEXT:    store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-; CHECK-NEXT:    store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-; CHECK-NEXT:    store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-; CHECK-NEXT:    store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-; CHECK-NEXT:    store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-; CHECK-NEXT:    store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+; CHECK-NEXT:    store i16 [[R0]], ptr @c16, align 2
+; CHECK-NEXT:    store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1), align 2
+; CHECK-NEXT:    store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2), align 2
+; CHECK-NEXT:    store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3), align 2
+; CHECK-NEXT:    store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4), align 2
+; CHECK-NEXT:    store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5), align 2
+; CHECK-NEXT:    store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6), align 2
+; CHECK-NEXT:    store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7), align 2
+; CHECK-NEXT:    store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; CHECK-NEXT:    store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9), align 2
+; CHECK-NEXT:    store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+; CHECK-NEXT:    store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+; CHECK-NEXT:    store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+; CHECK-NEXT:    store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+; CHECK-NEXT:    store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+; CHECK-NEXT:    store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+; CHECK-NEXT:    store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; CHECK-NEXT:    store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+; CHECK-NEXT:    store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+; CHECK-NEXT:    store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+; CHECK-NEXT:    store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+; CHECK-NEXT:    store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+; CHECK-NEXT:    store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+; CHECK-NEXT:    store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+; CHECK-NEXT:    store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; CHECK-NEXT:    store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+; CHECK-NEXT:    store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+; CHECK-NEXT:    store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+; CHECK-NEXT:    store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+; CHECK-NEXT:    store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+; CHECK-NEXT:    store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+; CHECK-NEXT:    store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %c0  = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a0 , i16 %b0 )
   %c1  = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a1 , i16 %b1 )
   %c2  = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a2 , i16 %b2 )
@@ -571,171 +571,171 @@ define void @add_v32i16() {
   %r29 = extractvalue {i16, i1} %c29, 0
   %r30 = extractvalue {i16, i1} %c30, 0
   %r31 = extractvalue {i16, i1} %c31, 0
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @add_v64i8() {
 ; CHECK-LABEL: @add_v64i8(
-; CHECK-NEXT:    [[A0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[A1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[A2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[A3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[A4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[A5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[A6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[A7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[A8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[A9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[A10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[A11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[A12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[A13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[A14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[A15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[A16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[A17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[A18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[A19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[A20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[A21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[A22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[A23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[A24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[A25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[A26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[A27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[A28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[A29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[A30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[A31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[A32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[A33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[A34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[A35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[A36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[A37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[A38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[A39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[A40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[A41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[A42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[A43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[A44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[A45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[A46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[A47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[A48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[A49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[A50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[A51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[A52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[A53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[A54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[A55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[A56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[A57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[A58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[A59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[A60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[A61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[A62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[A63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-; CHECK-NEXT:    [[B0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[B1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[B2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[B3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[B4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[B5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[B6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[B7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[B8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[B9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[B10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[B11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[B12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[B13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[B14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[B15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[B16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[B17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[B18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[B19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[B20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[B21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[B22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[B23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[B24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[B25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[B26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[B27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[B28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[B29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[B30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[B31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[B32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[B33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[B34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[B35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[B36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[B37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[B38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[B39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[B40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[B41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[B42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[B43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[B44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[B45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[B46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[B47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[B48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[B49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[B50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[B51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[B52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[B53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[B54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[B55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[B56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[B57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[B58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[B59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[B60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[B61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[B62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[B63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[A0:%.*]] = load i8, ptr @a8, align 1
+; CHECK-NEXT:    [[A1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[A2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[A3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[A4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[A5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[A6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[A7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[A8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[A9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[A10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[A11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[A12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[A13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[A14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[A15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[A16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[A17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[A18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[A19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[A20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[A21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[A22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[A23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[A24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[A25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[A26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[A27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[A28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[A29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[A30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[A31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[A32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[A33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[A34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[A35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[A36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[A37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[A38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[A39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[A40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[A41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[A42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[A43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[A44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[A45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[A46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[A47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[A48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[A49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[A50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[A51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[A52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[A53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[A54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[A55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[A56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[A57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[A58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[A59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[A60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[A61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[A62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[A63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[B0:%.*]] = load i8, ptr @b8, align 1
+; CHECK-NEXT:    [[B1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[B2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[B3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[B4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[B5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[B6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[B7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[B8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[B9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[B10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[B11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[B12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[B13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[B14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[B15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[B16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[B17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[B18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[B19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[B20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[B21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[B22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[B23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[B24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[B25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[B26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[B27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[B28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[B29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[B30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[B31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[B32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[B33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[B34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[B35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[B36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[B37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[B38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[B39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[B40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[B41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[B42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[B43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[B44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[B45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[B46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[B47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[B48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[B49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[B50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[B51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[B52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[B53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[B54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[B55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[B56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[B57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[B58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[B59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[B60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[B61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[B62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[B63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    [[C0:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[A0]], i8 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[A1]], i8 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[A2]], i8 [[B2]])
@@ -864,200 +864,200 @@ define void @add_v64i8() {
 ; CHECK-NEXT:    [[R61:%.*]] = extractvalue { i8, i1 } [[C61]], 0
 ; CHECK-NEXT:    [[R62:%.*]] = extractvalue { i8, i1 } [[C62]], 0
 ; CHECK-NEXT:    [[R63:%.*]] = extractvalue { i8, i1 } [[C63]], 0
-; CHECK-NEXT:    store i8 [[R0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0), align 1
-; CHECK-NEXT:    store i8 [[R1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1), align 1
-; CHECK-NEXT:    store i8 [[R2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2), align 1
-; CHECK-NEXT:    store i8 [[R3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3), align 1
-; CHECK-NEXT:    store i8 [[R4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4), align 1
-; CHECK-NEXT:    store i8 [[R5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5), align 1
-; CHECK-NEXT:    store i8 [[R6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6), align 1
-; CHECK-NEXT:    store i8 [[R7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7), align 1
-; CHECK-NEXT:    store i8 [[R8]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8), align 1
-; CHECK-NEXT:    store i8 [[R9]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9), align 1
-; CHECK-NEXT:    store i8 [[R10]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-; CHECK-NEXT:    store i8 [[R11]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-; CHECK-NEXT:    store i8 [[R12]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-; CHECK-NEXT:    store i8 [[R13]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-; CHECK-NEXT:    store i8 [[R14]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-; CHECK-NEXT:    store i8 [[R15]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-; CHECK-NEXT:    store i8 [[R16]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-; CHECK-NEXT:    store i8 [[R17]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-; CHECK-NEXT:    store i8 [[R18]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-; CHECK-NEXT:    store i8 [[R19]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-; CHECK-NEXT:    store i8 [[R20]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-; CHECK-NEXT:    store i8 [[R21]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-; CHECK-NEXT:    store i8 [[R22]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-; CHECK-NEXT:    store i8 [[R23]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-; CHECK-NEXT:    store i8 [[R24]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-; CHECK-NEXT:    store i8 [[R25]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-; CHECK-NEXT:    store i8 [[R26]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-; CHECK-NEXT:    store i8 [[R27]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-; CHECK-NEXT:    store i8 [[R28]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-; CHECK-NEXT:    store i8 [[R29]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-; CHECK-NEXT:    store i8 [[R30]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-; CHECK-NEXT:    store i8 [[R31]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-; CHECK-NEXT:    store i8 [[R32]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-; CHECK-NEXT:    store i8 [[R33]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-; CHECK-NEXT:    store i8 [[R34]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-; CHECK-NEXT:    store i8 [[R35]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-; CHECK-NEXT:    store i8 [[R36]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-; CHECK-NEXT:    store i8 [[R37]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-; CHECK-NEXT:    store i8 [[R38]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-; CHECK-NEXT:    store i8 [[R39]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-; CHECK-NEXT:    store i8 [[R40]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-; CHECK-NEXT:    store i8 [[R41]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-; CHECK-NEXT:    store i8 [[R42]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-; CHECK-NEXT:    store i8 [[R43]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-; CHECK-NEXT:    store i8 [[R44]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-; CHECK-NEXT:    store i8 [[R45]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-; CHECK-NEXT:    store i8 [[R46]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-; CHECK-NEXT:    store i8 [[R47]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-; CHECK-NEXT:    store i8 [[R48]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-; CHECK-NEXT:    store i8 [[R49]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-; CHECK-NEXT:    store i8 [[R50]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-; CHECK-NEXT:    store i8 [[R51]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-; CHECK-NEXT:    store i8 [[R52]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-; CHECK-NEXT:    store i8 [[R53]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-; CHECK-NEXT:    store i8 [[R54]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-; CHECK-NEXT:    store i8 [[R55]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-; CHECK-NEXT:    store i8 [[R56]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-; CHECK-NEXT:    store i8 [[R57]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-; CHECK-NEXT:    store i8 [[R58]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-; CHECK-NEXT:    store i8 [[R59]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-; CHECK-NEXT:    store i8 [[R60]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-; CHECK-NEXT:    store i8 [[R61]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-; CHECK-NEXT:    store i8 [[R62]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-; CHECK-NEXT:    store i8 [[R63]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+; CHECK-NEXT:    store i8 [[R0]], ptr @c8, align 1
+; CHECK-NEXT:    store i8 [[R1]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1), align 1
+; CHECK-NEXT:    store i8 [[R2]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2), align 1
+; CHECK-NEXT:    store i8 [[R3]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3), align 1
+; CHECK-NEXT:    store i8 [[R4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4), align 1
+; CHECK-NEXT:    store i8 [[R5]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5), align 1
+; CHECK-NEXT:    store i8 [[R6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6), align 1
+; CHECK-NEXT:    store i8 [[R7]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7), align 1
+; CHECK-NEXT:    store i8 [[R8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8), align 1
+; CHECK-NEXT:    store i8 [[R9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9), align 1
+; CHECK-NEXT:    store i8 [[R10]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+; CHECK-NEXT:    store i8 [[R11]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+; CHECK-NEXT:    store i8 [[R12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+; CHECK-NEXT:    store i8 [[R13]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+; CHECK-NEXT:    store i8 [[R14]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+; CHECK-NEXT:    store i8 [[R15]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+; CHECK-NEXT:    store i8 [[R16]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; CHECK-NEXT:    store i8 [[R17]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+; CHECK-NEXT:    store i8 [[R18]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+; CHECK-NEXT:    store i8 [[R19]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+; CHECK-NEXT:    store i8 [[R20]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+; CHECK-NEXT:    store i8 [[R21]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+; CHECK-NEXT:    store i8 [[R22]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+; CHECK-NEXT:    store i8 [[R23]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+; CHECK-NEXT:    store i8 [[R24]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+; CHECK-NEXT:    store i8 [[R25]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+; CHECK-NEXT:    store i8 [[R26]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+; CHECK-NEXT:    store i8 [[R27]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+; CHECK-NEXT:    store i8 [[R28]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+; CHECK-NEXT:    store i8 [[R29]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+; CHECK-NEXT:    store i8 [[R30]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+; CHECK-NEXT:    store i8 [[R31]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+; CHECK-NEXT:    store i8 [[R32]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; CHECK-NEXT:    store i8 [[R33]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+; CHECK-NEXT:    store i8 [[R34]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+; CHECK-NEXT:    store i8 [[R35]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+; CHECK-NEXT:    store i8 [[R36]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+; CHECK-NEXT:    store i8 [[R37]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+; CHECK-NEXT:    store i8 [[R38]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+; CHECK-NEXT:    store i8 [[R39]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+; CHECK-NEXT:    store i8 [[R40]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+; CHECK-NEXT:    store i8 [[R41]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+; CHECK-NEXT:    store i8 [[R42]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+; CHECK-NEXT:    store i8 [[R43]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+; CHECK-NEXT:    store i8 [[R44]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+; CHECK-NEXT:    store i8 [[R45]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+; CHECK-NEXT:    store i8 [[R46]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+; CHECK-NEXT:    store i8 [[R47]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+; CHECK-NEXT:    store i8 [[R48]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; CHECK-NEXT:    store i8 [[R49]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+; CHECK-NEXT:    store i8 [[R50]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+; CHECK-NEXT:    store i8 [[R51]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+; CHECK-NEXT:    store i8 [[R52]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+; CHECK-NEXT:    store i8 [[R53]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+; CHECK-NEXT:    store i8 [[R54]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+; CHECK-NEXT:    store i8 [[R55]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+; CHECK-NEXT:    store i8 [[R56]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+; CHECK-NEXT:    store i8 [[R57]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+; CHECK-NEXT:    store i8 [[R58]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+; CHECK-NEXT:    store i8 [[R59]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+; CHECK-NEXT:    store i8 [[R60]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+; CHECK-NEXT:    store i8 [[R61]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+; CHECK-NEXT:    store i8 [[R62]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+; CHECK-NEXT:    store i8 [[R63]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %c0  = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a0 , i8 %b0 )
   %c1  = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a1 , i8 %b1 )
   %c2  = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a2 , i8 %b2 )
@@ -1186,69 +1186,69 @@ define void @add_v64i8() {
   %r61 = extractvalue {i8, i1} %c61, 0
   %r62 = extractvalue {i8, i1} %c62, 0
   %r63 = extractvalue {i8, i1} %c63, 0
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
index eddd39e15d696..fab022d691c07 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
@@ -27,22 +27,22 @@ declare i8  @llvm.uadd.sat.i8 (i8 , i8 )
 
 define void @add_v8i64() {
 ; SSE-LABEL: @add_v8i64(
-; SSE-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[R0:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A0]], i64 [[B0]])
 ; SSE-NEXT:    [[R1:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A1]], i64 [[B1]])
 ; SSE-NEXT:    [[R2:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A2]], i64 [[B2]])
@@ -51,50 +51,50 @@ define void @add_v8i64() {
 ; SSE-NEXT:    [[R5:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A5]], i64 [[B5]])
 ; SSE-NEXT:    [[R6:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A6]], i64 [[B6]])
 ; SSE-NEXT:    [[R7:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A7]], i64 [[B7]])
-; SSE-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.uadd.sat.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.uadd.sat.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.uadd.sat.i64(i64 %a2, i64 %b2)
@@ -103,87 +103,87 @@ define void @add_v8i64() {
   %r5 = call i64 @llvm.uadd.sat.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.uadd.sat.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.uadd.sat.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @add_v16i32() {
 ; SSE-LABEL: @add_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.uadd.sat.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.uadd.sat.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.uadd.sat.i32(i32 %a2 , i32 %b2 )
@@ -200,127 +200,127 @@ define void @add_v16i32() {
   %r13 = call i32 @llvm.uadd.sat.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.uadd.sat.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.uadd.sat.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @add_v32i16() {
 ; SSE-LABEL: @add_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.uadd.sat.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.uadd.sat.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.uadd.sat.i16(i16 %a2 , i16 %b2 )
@@ -353,207 +353,207 @@ define void @add_v32i16() {
   %r29 = call i16 @llvm.uadd.sat.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.uadd.sat.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.uadd.sat.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @add_v64i8() {
 ; SSE-LABEL: @add_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.uadd.sat.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.uadd.sat.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.uadd.sat.i8(i8 %a2 , i8 %b2 )
@@ -618,69 +618,69 @@ define void @add_v64i8() {
   %r61 = call i8 @llvm.uadd.sat.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.uadd.sat.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.uadd.sat.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
index d4fd9df8971be..dafed43e6e71c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
@@ -24,77 +24,77 @@
 
 define void @add_v8i64() {
 ; SSE-LABEL: @add_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v8i64(
-; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SLM-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SLM-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SLM-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SLM-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = add <8 x i64> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = add i64 %a0, %b0
   %r1 = add i64 %a1, %b1
   %r2 = add i64 %a2, %b2
@@ -103,106 +103,106 @@ define void @add_v8i64() {
   %r5 = add i64 %a5, %b5
   %r6 = add i64 %a6, %b6
   %r7 = add i64 %a7, %b7
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @add_v16i32() {
 ; SSE-LABEL: @add_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = add <4 x i32> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v16i32(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP12:%.*]] = add <4 x i32> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = add <16 x i32> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = add i32 %a0 , %b0
   %r1  = add i32 %a1 , %b1
   %r2  = add i32 %a2 , %b2
@@ -219,146 +219,146 @@ define void @add_v16i32() {
   %r13 = add i32 %a13, %b13
   %r14 = add i32 %a14, %b14
   %r15 = add i32 %a15, %b15
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @add_v32i16() {
 ; SSE-LABEL: @add_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = add <8 x i16> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = add <8 x i16> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = add <8 x i16> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = add <8 x i16> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = add <32 x i16> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = add i16 %a0 , %b0
   %r1  = add i16 %a1 , %b1
   %r2  = add i16 %a2 , %b2
@@ -391,226 +391,226 @@ define void @add_v32i16() {
   %r29 = add i16 %a29, %b29
   %r30 = add i16 %a30, %b30
   %r31 = add i16 %a31, %b31
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @add_v64i8() {
 ; SSE-LABEL: @add_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = add <16 x i8> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = add <16 x i8> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = add <16 x i8> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = add <16 x i8> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @add_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = add <16 x i8> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = add <16 x i8> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = add <16 x i8> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = add <16 x i8> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @add_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = add <32 x i8> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = add <32 x i8> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @add_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = add <64 x i8> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = add i8 %a0 , %b0
   %r1  = add i8 %a1 , %b1
   %r2  = add i8 %a2 , %b2
@@ -675,69 +675,69 @@ define void @add_v64i8() {
   %r61 = add i8 %a61, %b61
   %r62 = add i8 %a62, %b62
   %r63 = add i8 %a63, %b63
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
index fbe946d0a5ebb..33fd3e6dc0e09 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
@@ -24,66 +24,66 @@
 
 define void @sdiv_v16i32_uniformconst() {
 ; SSE-LABEL: @sdiv_v16i32_uniformconst(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sdiv_v16i32_uniformconst(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SLM-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sdiv_v16i32_uniformconst(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sdiv_v16i32_uniformconst(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX512-NEXT:    store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
   %r0  = sdiv i32 %a0 , 5
   %r1  = sdiv i32 %a1 , 5
   %r2  = sdiv i32 %a2 , 5
@@ -100,87 +100,87 @@ define void @sdiv_v16i32_uniformconst() {
   %r13 = sdiv i32 %a13, 5
   %r14 = sdiv i32 %a14, 5
   %r15 = sdiv i32 %a15, 5
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @srem_v16i32_uniformconst() {
 ; SSE-LABEL: @srem_v16i32_uniformconst(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @srem_v16i32_uniformconst(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SLM-NEXT:    [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @srem_v16i32_uniformconst(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @srem_v16i32_uniformconst(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX512-NEXT:    store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
   %r0  = srem i32 %a0 , 5
   %r1  = srem i32 %a1 , 5
   %r2  = srem i32 %a2 , 5
@@ -197,87 +197,87 @@ define void @srem_v16i32_uniformconst() {
   %r13 = srem i32 %a13, 5
   %r14 = srem i32 %a14, 5
   %r15 = srem i32 %a15, 5
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @udiv_v16i32_uniformconst() {
 ; SSE-LABEL: @udiv_v16i32_uniformconst(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @udiv_v16i32_uniformconst(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SLM-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @udiv_v16i32_uniformconst(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @udiv_v16i32_uniformconst(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX512-NEXT:    store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
   %r0  = udiv i32 %a0 , 5
   %r1  = udiv i32 %a1 , 5
   %r2  = udiv i32 %a2 , 5
@@ -294,87 +294,87 @@ define void @udiv_v16i32_uniformconst() {
   %r13 = udiv i32 %a13, 5
   %r14 = udiv i32 %a14, 5
   %r15 = udiv i32 %a15, 5
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @urem_v16i32_uniformconst() {
 ; SSE-LABEL: @urem_v16i32_uniformconst(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @urem_v16i32_uniformconst(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
 ; SLM-NEXT:    [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
-; SLM-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @urem_v16i32_uniformconst(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @urem_v16i32_uniformconst(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; AVX512-NEXT:    store <16 x i32> [[TMP2]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
   %r0  = urem i32 %a0 , 5
   %r1  = urem i32 %a1 , 5
   %r2  = urem i32 %a2 , 5
@@ -391,21 +391,21 @@ define void @urem_v16i32_uniformconst() {
   %r13 = urem i32 %a13, 5
   %r14 = urem i32 %a14, 5
   %r15 = urem i32 %a15, 5
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
index c9a582416bab4..e4c76daddb02e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
@@ -27,107 +27,107 @@ declare i8  @llvm.smul.fix.i8 (i8 , i8 , i32)
 
 define void @smul_v8i64() {
 ; SSE-LABEL: @smul_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @smul_v8i64(
-; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SLM-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SLM-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SLM-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SLM-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX1-LABEL: @smul_v8i64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; AVX1-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; AVX1-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @smul_v8i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX2-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
-; AVX2-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
-; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smul_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256BW-LABEL: @smul_v8i64(
-; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX256BW-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
-; AVX256BW-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX256BW-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
-; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.smul.fix.i64(i64 %a0, i64 %b0, i32 3)
   %r1 = call i64 @llvm.smul.fix.i64(i64 %a1, i64 %b1, i32 3)
   %r2 = call i64 @llvm.smul.fix.i64(i64 %a2, i64 %b2, i32 3)
@@ -136,70 +136,70 @@ define void @smul_v8i64() {
   %r5 = call i64 @llvm.smul.fix.i64(i64 %a5, i64 %b5, i32 3)
   %r6 = call i64 @llvm.smul.fix.i64(i64 %a6, i64 %b6, i32 3)
   %r7 = call i64 @llvm.smul.fix.i64(i64 %a7, i64 %b7, i32 3)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @smul_v16i32() {
 ; SSE-LABEL: @smul_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @smul_v16i32(
-; SLM-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; SLM-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; SLM-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; SLM-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; SLM-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; SLM-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; SLM-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; SLM-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; SLM-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; SLM-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; SLM-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; SLM-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; SLM-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; SLM-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; SLM-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; SLM-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; SLM-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; SLM-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; SLM-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; SLM-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; SLM-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; SLM-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; SLM-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; SLM-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; SLM-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; SLM-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; SLM-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; SLM-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; SLM-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; SLM-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; SLM-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; SLM-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; SLM-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; SLM-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; SLM-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; SLM-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; SLM-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; SLM-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; SLM-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; SLM-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; SLM-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; SLM-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; SLM-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; SLM-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; SLM-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; SLM-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; SLM-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; SLM-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; SLM-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; SLM-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; SLM-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; SLM-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; SLM-NEXT:    [[R0:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A0]], i32 [[B0]], i32 3)
 ; SLM-NEXT:    [[R1:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A1]], i32 [[B1]], i32 3)
 ; SLM-NEXT:    [[R2:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A2]], i32 [[B2]], i32 3)
@@ -216,74 +216,74 @@ define void @smul_v16i32() {
 ; SLM-NEXT:    [[R13:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A13]], i32 [[B13]], i32 3)
 ; SLM-NEXT:    [[R14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A14]], i32 [[B14]], i32 3)
 ; SLM-NEXT:    [[R15:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A15]], i32 [[B15]], i32 3)
-; SLM-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; SLM-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; SLM-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; SLM-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; SLM-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; SLM-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; SLM-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; SLM-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; SLM-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; SLM-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; SLM-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; SLM-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; SLM-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; SLM-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; SLM-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; SLM-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; SLM-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; SLM-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; SLM-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; SLM-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; SLM-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; SLM-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; SLM-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; SLM-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; SLM-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; SLM-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; SLM-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; SLM-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; SLM-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; SLM-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @smul_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], i32 3)
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]], i32 3)
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smul_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.smul.fix.i32(i32 %a0 , i32 %b0 , i32 3)
   %r1  = call i32 @llvm.smul.fix.i32(i32 %a1 , i32 %b1 , i32 3)
   %r2  = call i32 @llvm.smul.fix.i32(i32 %a2 , i32 %b2 , i32 3)
@@ -300,146 +300,146 @@ define void @smul_v16i32() {
   %r13 = call i32 @llvm.smul.fix.i32(i32 %a13, i32 %b13, i32 3)
   %r14 = call i32 @llvm.smul.fix.i32(i32 %a14, i32 %b14, i32 3)
   %r15 = call i32 @llvm.smul.fix.i32(i32 %a15, i32 %b15, i32 3)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @smul_v32i16() {
 ; SSE-LABEL: @smul_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @smul_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @smul_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]], i32 3)
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]], i32 3)
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smul_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.smul.fix.i16(i16 %a0 , i16 %b0 , i32 3)
   %r1  = call i16 @llvm.smul.fix.i16(i16 %a1 , i16 %b1 , i32 3)
   %r2  = call i16 @llvm.smul.fix.i16(i16 %a2 , i16 %b2 , i32 3)
@@ -472,226 +472,226 @@ define void @smul_v32i16() {
   %r29 = call i16 @llvm.smul.fix.i16(i16 %a29, i16 %b29, i32 3)
   %r30 = call i16 @llvm.smul.fix.i16(i16 %a30, i16 %b30, i32 3)
   %r31 = call i16 @llvm.smul.fix.i16(i16 %a31, i16 %b31, i32 3)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @smul_v64i8() {
 ; SSE-LABEL: @smul_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @smul_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @smul_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]], i32 3)
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]], i32 3)
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smul_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.smul.fix.i8(i8 %a0 , i8 %b0 , i32 3)
   %r1  = call i8 @llvm.smul.fix.i8(i8 %a1 , i8 %b1 , i32 3)
   %r2  = call i8 @llvm.smul.fix.i8(i8 %a2 , i8 %b2 , i32 3)
@@ -756,70 +756,70 @@ define void @smul_v64i8() {
   %r61 = call i8 @llvm.smul.fix.i8(i8 %a61, i8 %b61, i32 3)
   %r62 = call i8 @llvm.smul.fix.i8(i8 %a62, i8 %b62, i32 3)
   %r63 = call i8 @llvm.smul.fix.i8(i8 %a63, i8 %b63, i32 3)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }
 
@@ -830,107 +830,107 @@ declare i8  @llvm.umul.fix.i8 (i8 , i8 , i32)
 
 define void @umul_v8i64() {
 ; SSE-LABEL: @umul_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @umul_v8i64(
-; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SLM-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SLM-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SLM-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SLM-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
-; SLM-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX1-LABEL: @umul_v8i64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; AVX1-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; AVX1-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i32 3)
-; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @umul_v8i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX2-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
-; AVX2-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
-; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umul_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256BW-LABEL: @umul_v8i64(
-; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX256BW-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i32 3)
-; AVX256BW-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX256BW-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]], i32 3)
-; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.umul.fix.i64(i64 %a0, i64 %b0, i32 3)
   %r1 = call i64 @llvm.umul.fix.i64(i64 %a1, i64 %b1, i32 3)
   %r2 = call i64 @llvm.umul.fix.i64(i64 %a2, i64 %b2, i32 3)
@@ -939,70 +939,70 @@ define void @umul_v8i64() {
   %r5 = call i64 @llvm.umul.fix.i64(i64 %a5, i64 %b5, i32 3)
   %r6 = call i64 @llvm.umul.fix.i64(i64 %a6, i64 %b6, i32 3)
   %r7 = call i64 @llvm.umul.fix.i64(i64 %a7, i64 %b7, i32 3)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @umul_v16i32() {
 ; SSE-LABEL: @umul_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 3)
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @umul_v16i32(
-; SLM-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; SLM-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; SLM-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; SLM-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; SLM-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; SLM-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; SLM-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; SLM-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; SLM-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; SLM-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; SLM-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; SLM-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; SLM-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; SLM-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; SLM-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; SLM-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; SLM-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; SLM-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; SLM-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; SLM-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; SLM-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; SLM-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; SLM-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; SLM-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; SLM-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; SLM-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; SLM-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; SLM-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; SLM-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; SLM-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; SLM-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; SLM-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; SLM-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; SLM-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; SLM-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; SLM-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; SLM-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; SLM-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; SLM-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; SLM-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; SLM-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; SLM-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; SLM-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; SLM-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; SLM-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; SLM-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; SLM-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; SLM-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; SLM-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; SLM-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; SLM-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; SLM-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; SLM-NEXT:    [[R0:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A0]], i32 [[B0]], i32 3)
 ; SLM-NEXT:    [[R1:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A1]], i32 [[B1]], i32 3)
 ; SLM-NEXT:    [[R2:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A2]], i32 [[B2]], i32 3)
@@ -1019,74 +1019,74 @@ define void @umul_v16i32() {
 ; SLM-NEXT:    [[R13:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A13]], i32 [[B13]], i32 3)
 ; SLM-NEXT:    [[R14:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A14]], i32 [[B14]], i32 3)
 ; SLM-NEXT:    [[R15:%.*]] = call i32 @llvm.umul.fix.i32(i32 [[A15]], i32 [[B15]], i32 3)
-; SLM-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; SLM-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; SLM-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; SLM-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; SLM-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; SLM-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; SLM-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; SLM-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; SLM-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; SLM-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; SLM-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; SLM-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; SLM-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; SLM-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; SLM-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; SLM-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; SLM-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; SLM-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; SLM-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; SLM-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; SLM-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; SLM-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; SLM-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; SLM-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; SLM-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; SLM-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; SLM-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; SLM-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; SLM-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; SLM-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @umul_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]], i32 3)
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]], i32 3)
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umul_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.umul.fix.i32(i32 %a0 , i32 %b0 , i32 3)
   %r1  = call i32 @llvm.umul.fix.i32(i32 %a1 , i32 %b1 , i32 3)
   %r2  = call i32 @llvm.umul.fix.i32(i32 %a2 , i32 %b2 , i32 3)
@@ -1103,146 +1103,146 @@ define void @umul_v16i32() {
   %r13 = call i32 @llvm.umul.fix.i32(i32 %a13, i32 %b13, i32 3)
   %r14 = call i32 @llvm.umul.fix.i32(i32 %a14, i32 %b14, i32 3)
   %r15 = call i32 @llvm.umul.fix.i32(i32 %a15, i32 %b15, i32 3)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @umul_v32i16() {
 ; SSE-LABEL: @umul_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @umul_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i32 3)
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @umul_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]], i32 3)
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]], i32 3)
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umul_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.umul.fix.i16(i16 %a0 , i16 %b0 , i32 3)
   %r1  = call i16 @llvm.umul.fix.i16(i16 %a1 , i16 %b1 , i32 3)
   %r2  = call i16 @llvm.umul.fix.i16(i16 %a2 , i16 %b2 , i32 3)
@@ -1275,226 +1275,226 @@ define void @umul_v32i16() {
   %r29 = call i16 @llvm.umul.fix.i16(i16 %a29, i16 %b29, i32 3)
   %r30 = call i16 @llvm.umul.fix.i16(i16 %a30, i16 %b30, i32 3)
   %r31 = call i16 @llvm.umul.fix.i16(i16 %a31, i16 %b31, i32 3)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @umul_v64i8() {
 ; SSE-LABEL: @umul_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @umul_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 3)
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 3)
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]], i32 3)
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]], i32 3)
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @umul_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]], i32 3)
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]], i32 3)
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umul_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]], i32 3)
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.umul.fix.i8(i8 %a0 , i8 %b0 , i32 3)
   %r1  = call i8 @llvm.umul.fix.i8(i8 %a1 , i8 %b1 , i32 3)
   %r2  = call i8 @llvm.umul.fix.i8(i8 %a2 , i8 %b2 , i32 3)
@@ -1559,69 +1559,69 @@ define void @umul_v64i8() {
   %r61 = call i8 @llvm.umul.fix.i8(i8 %a61, i8 %b61, i32 3)
   %r62 = call i8 @llvm.umul.fix.i8(i8 %a62, i8 %b62, i32 3)
   %r63 = call i8 @llvm.umul.fix.i8(i8 %a63, i8 %b63, i32 3)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
index 806deeeaa63fe..72a3ddd0bb747 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
@@ -27,22 +27,22 @@ declare {i8 , i1} @llvm.smul.with.overflow.i8 (i8 , i8 )
 
 define void @mul_v8i64() {
 ; CHECK-LABEL: @mul_v8i64(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A0]], i64 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A1]], i64 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A2]], i64 [[B2]])
@@ -59,32 +59,32 @@ define void @mul_v8i64() {
 ; CHECK-NEXT:    [[R5:%.*]] = extractvalue { i64, i1 } [[C5]], 0
 ; CHECK-NEXT:    [[R6:%.*]] = extractvalue { i64, i1 } [[C6]], 0
 ; CHECK-NEXT:    [[R7:%.*]] = extractvalue { i64, i1 } [[C7]], 0
-; CHECK-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; CHECK-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; CHECK-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; CHECK-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; CHECK-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; CHECK-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; CHECK-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; CHECK-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; CHECK-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; CHECK-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; CHECK-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; CHECK-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; CHECK-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %c0 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %a0, i64 %b0)
   %c1 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %a1, i64 %b1)
   %c2 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %a2, i64 %b2)
@@ -101,51 +101,51 @@ define void @mul_v8i64() {
   %r5 = extractvalue {i64, i1} %c5, 0
   %r6 = extractvalue {i64, i1} %c6, 0
   %r7 = extractvalue {i64, i1} %c7, 0
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @mul_v16i32() {
 ; CHECK-LABEL: @mul_v16i32(
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; CHECK-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; CHECK-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; CHECK-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    [[C0:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[A0]], i32 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[A1]], i32 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[A2]], i32 [[B2]])
@@ -178,56 +178,56 @@ define void @mul_v16i32() {
 ; CHECK-NEXT:    [[R13:%.*]] = extractvalue { i32, i1 } [[C13]], 0
 ; CHECK-NEXT:    [[R14:%.*]] = extractvalue { i32, i1 } [[C14]], 0
 ; CHECK-NEXT:    [[R15:%.*]] = extractvalue { i32, i1 } [[C15]], 0
-; CHECK-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; CHECK-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; CHECK-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; CHECK-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; CHECK-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; CHECK-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; CHECK-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; CHECK-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; CHECK-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; CHECK-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; CHECK-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; CHECK-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; CHECK-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; CHECK-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; CHECK-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; CHECK-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; CHECK-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; CHECK-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; CHECK-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; CHECK-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; CHECK-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; CHECK-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; CHECK-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; CHECK-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; CHECK-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; CHECK-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; CHECK-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; CHECK-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; CHECK-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; CHECK-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; CHECK-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %c0  = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a0 , i32 %b0 )
   %c1  = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a1 , i32 %b1 )
   %c2  = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a2 , i32 %b2 )
@@ -260,91 +260,91 @@ define void @mul_v16i32() {
   %r13 = extractvalue {i32, i1} %c13, 0
   %r14 = extractvalue {i32, i1} %c14, 0
   %r15 = extractvalue {i32, i1} %c15, 0
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @mul_v32i16() {
 ; CHECK-LABEL: @mul_v32i16(
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr @a16, align 2
+; CHECK-NEXT:    [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr @b16, align 2
+; CHECK-NEXT:    [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    [[C0:%.*]] = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 [[A0]], i16 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 [[A1]], i16 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 [[A2]], i16 [[B2]])
@@ -409,104 +409,104 @@ define void @mul_v32i16() {
 ; CHECK-NEXT:    [[R29:%.*]] = extractvalue { i16, i1 } [[C29]], 0
 ; CHECK-NEXT:    [[R30:%.*]] = extractvalue { i16, i1 } [[C30]], 0
 ; CHECK-NEXT:    [[R31:%.*]] = extractvalue { i16, i1 } [[C31]], 0
-; CHECK-NEXT:    store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
-; CHECK-NEXT:    store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
-; CHECK-NEXT:    store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
-; CHECK-NEXT:    store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
-; CHECK-NEXT:    store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
-; CHECK-NEXT:    store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
-; CHECK-NEXT:    store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
-; CHECK-NEXT:    store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
-; CHECK-NEXT:    store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
-; CHECK-NEXT:    store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
-; CHECK-NEXT:    store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-; CHECK-NEXT:    store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-; CHECK-NEXT:    store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-; CHECK-NEXT:    store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-; CHECK-NEXT:    store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-; CHECK-NEXT:    store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-; CHECK-NEXT:    store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-; CHECK-NEXT:    store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-; CHECK-NEXT:    store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-; CHECK-NEXT:    store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-; CHECK-NEXT:    store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-; CHECK-NEXT:    store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-; CHECK-NEXT:    store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-; CHECK-NEXT:    store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-; CHECK-NEXT:    store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-; CHECK-NEXT:    store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-; CHECK-NEXT:    store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-; CHECK-NEXT:    store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-; CHECK-NEXT:    store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-; CHECK-NEXT:    store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-; CHECK-NEXT:    store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-; CHECK-NEXT:    store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+; CHECK-NEXT:    store i16 [[R0]], ptr @c16, align 2
+; CHECK-NEXT:    store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1), align 2
+; CHECK-NEXT:    store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2), align 2
+; CHECK-NEXT:    store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3), align 2
+; CHECK-NEXT:    store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4), align 2
+; CHECK-NEXT:    store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5), align 2
+; CHECK-NEXT:    store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6), align 2
+; CHECK-NEXT:    store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7), align 2
+; CHECK-NEXT:    store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; CHECK-NEXT:    store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9), align 2
+; CHECK-NEXT:    store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+; CHECK-NEXT:    store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+; CHECK-NEXT:    store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+; CHECK-NEXT:    store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+; CHECK-NEXT:    store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+; CHECK-NEXT:    store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+; CHECK-NEXT:    store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; CHECK-NEXT:    store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+; CHECK-NEXT:    store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+; CHECK-NEXT:    store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+; CHECK-NEXT:    store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+; CHECK-NEXT:    store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+; CHECK-NEXT:    store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+; CHECK-NEXT:    store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+; CHECK-NEXT:    store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; CHECK-NEXT:    store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+; CHECK-NEXT:    store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+; CHECK-NEXT:    store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+; CHECK-NEXT:    store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+; CHECK-NEXT:    store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+; CHECK-NEXT:    store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+; CHECK-NEXT:    store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %c0  = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %a0 , i16 %b0 )
   %c1  = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %a1 , i16 %b1 )
   %c2  = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %a2 , i16 %b2 )
@@ -571,171 +571,171 @@ define void @mul_v32i16() {
   %r29 = extractvalue {i16, i1} %c29, 0
   %r30 = extractvalue {i16, i1} %c30, 0
   %r31 = extractvalue {i16, i1} %c31, 0
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @mul_v64i8() {
 ; CHECK-LABEL: @mul_v64i8(
-; CHECK-NEXT:    [[A0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[A1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[A2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[A3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[A4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[A5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[A6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[A7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[A8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[A9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[A10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[A11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[A12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[A13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[A14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[A15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[A16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[A17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[A18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[A19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[A20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[A21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[A22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[A23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[A24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[A25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[A26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[A27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[A28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[A29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[A30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[A31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[A32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[A33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[A34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[A35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[A36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[A37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[A38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[A39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[A40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[A41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[A42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[A43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[A44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[A45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[A46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[A47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[A48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[A49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[A50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[A51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[A52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[A53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[A54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[A55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[A56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[A57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[A58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[A59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[A60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[A61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[A62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[A63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-; CHECK-NEXT:    [[B0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[B1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[B2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[B3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[B4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[B5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[B6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[B7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[B8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[B9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[B10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[B11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[B12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[B13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[B14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[B15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[B16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[B17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[B18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[B19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[B20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[B21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[B22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[B23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[B24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[B25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[B26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[B27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[B28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[B29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[B30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[B31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[B32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[B33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[B34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[B35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[B36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[B37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[B38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[B39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[B40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[B41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[B42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[B43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[B44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[B45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[B46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[B47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[B48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[B49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[B50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[B51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[B52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[B53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[B54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[B55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[B56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[B57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[B58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[B59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[B60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[B61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[B62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[B63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[A0:%.*]] = load i8, ptr @a8, align 1
+; CHECK-NEXT:    [[A1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[A2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[A3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[A4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[A5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[A6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[A7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[A8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[A9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[A10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[A11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[A12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[A13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[A14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[A15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[A16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[A17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[A18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[A19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[A20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[A21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[A22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[A23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[A24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[A25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[A26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[A27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[A28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[A29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[A30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[A31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[A32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[A33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[A34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[A35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[A36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[A37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[A38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[A39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[A40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[A41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[A42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[A43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[A44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[A45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[A46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[A47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[A48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[A49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[A50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[A51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[A52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[A53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[A54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[A55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[A56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[A57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[A58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[A59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[A60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[A61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[A62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[A63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[B0:%.*]] = load i8, ptr @b8, align 1
+; CHECK-NEXT:    [[B1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[B2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[B3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[B4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[B5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[B6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[B7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[B8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[B9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[B10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[B11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[B12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[B13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[B14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[B15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[B16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[B17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[B18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[B19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[B20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[B21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[B22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[B23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[B24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[B25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[B26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[B27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[B28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[B29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[B30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[B31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[B32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[B33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[B34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[B35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[B36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[B37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[B38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[B39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[B40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[B41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[B42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[B43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[B44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[B45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[B46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[B47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[B48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[B49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[B50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[B51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[B52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[B53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[B54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[B55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[B56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[B57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[B58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[B59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[B60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[B61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[B62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[B63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    [[C0:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A0]], i8 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A1]], i8 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A2]], i8 [[B2]])
@@ -864,200 +864,200 @@ define void @mul_v64i8() {
 ; CHECK-NEXT:    [[R61:%.*]] = extractvalue { i8, i1 } [[C61]], 0
 ; CHECK-NEXT:    [[R62:%.*]] = extractvalue { i8, i1 } [[C62]], 0
 ; CHECK-NEXT:    [[R63:%.*]] = extractvalue { i8, i1 } [[C63]], 0
-; CHECK-NEXT:    store i8 [[R0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0), align 1
-; CHECK-NEXT:    store i8 [[R1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1), align 1
-; CHECK-NEXT:    store i8 [[R2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2), align 1
-; CHECK-NEXT:    store i8 [[R3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3), align 1
-; CHECK-NEXT:    store i8 [[R4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4), align 1
-; CHECK-NEXT:    store i8 [[R5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5), align 1
-; CHECK-NEXT:    store i8 [[R6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6), align 1
-; CHECK-NEXT:    store i8 [[R7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7), align 1
-; CHECK-NEXT:    store i8 [[R8]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8), align 1
-; CHECK-NEXT:    store i8 [[R9]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9), align 1
-; CHECK-NEXT:    store i8 [[R10]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-; CHECK-NEXT:    store i8 [[R11]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-; CHECK-NEXT:    store i8 [[R12]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-; CHECK-NEXT:    store i8 [[R13]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-; CHECK-NEXT:    store i8 [[R14]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-; CHECK-NEXT:    store i8 [[R15]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-; CHECK-NEXT:    store i8 [[R16]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-; CHECK-NEXT:    store i8 [[R17]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-; CHECK-NEXT:    store i8 [[R18]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-; CHECK-NEXT:    store i8 [[R19]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-; CHECK-NEXT:    store i8 [[R20]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-; CHECK-NEXT:    store i8 [[R21]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-; CHECK-NEXT:    store i8 [[R22]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-; CHECK-NEXT:    store i8 [[R23]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-; CHECK-NEXT:    store i8 [[R24]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-; CHECK-NEXT:    store i8 [[R25]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-; CHECK-NEXT:    store i8 [[R26]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-; CHECK-NEXT:    store i8 [[R27]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-; CHECK-NEXT:    store i8 [[R28]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-; CHECK-NEXT:    store i8 [[R29]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-; CHECK-NEXT:    store i8 [[R30]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-; CHECK-NEXT:    store i8 [[R31]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-; CHECK-NEXT:    store i8 [[R32]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-; CHECK-NEXT:    store i8 [[R33]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-; CHECK-NEXT:    store i8 [[R34]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-; CHECK-NEXT:    store i8 [[R35]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-; CHECK-NEXT:    store i8 [[R36]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-; CHECK-NEXT:    store i8 [[R37]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-; CHECK-NEXT:    store i8 [[R38]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-; CHECK-NEXT:    store i8 [[R39]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-; CHECK-NEXT:    store i8 [[R40]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-; CHECK-NEXT:    store i8 [[R41]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-; CHECK-NEXT:    store i8 [[R42]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-; CHECK-NEXT:    store i8 [[R43]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-; CHECK-NEXT:    store i8 [[R44]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-; CHECK-NEXT:    store i8 [[R45]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-; CHECK-NEXT:    store i8 [[R46]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-; CHECK-NEXT:    store i8 [[R47]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-; CHECK-NEXT:    store i8 [[R48]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-; CHECK-NEXT:    store i8 [[R49]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-; CHECK-NEXT:    store i8 [[R50]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-; CHECK-NEXT:    store i8 [[R51]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-; CHECK-NEXT:    store i8 [[R52]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-; CHECK-NEXT:    store i8 [[R53]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-; CHECK-NEXT:    store i8 [[R54]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-; CHECK-NEXT:    store i8 [[R55]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-; CHECK-NEXT:    store i8 [[R56]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-; CHECK-NEXT:    store i8 [[R57]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-; CHECK-NEXT:    store i8 [[R58]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-; CHECK-NEXT:    store i8 [[R59]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-; CHECK-NEXT:    store i8 [[R60]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-; CHECK-NEXT:    store i8 [[R61]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-; CHECK-NEXT:    store i8 [[R62]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-; CHECK-NEXT:    store i8 [[R63]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+; CHECK-NEXT:    store i8 [[R0]], ptr @c8, align 1
+; CHECK-NEXT:    store i8 [[R1]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1), align 1
+; CHECK-NEXT:    store i8 [[R2]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2), align 1
+; CHECK-NEXT:    store i8 [[R3]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3), align 1
+; CHECK-NEXT:    store i8 [[R4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4), align 1
+; CHECK-NEXT:    store i8 [[R5]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5), align 1
+; CHECK-NEXT:    store i8 [[R6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6), align 1
+; CHECK-NEXT:    store i8 [[R7]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7), align 1
+; CHECK-NEXT:    store i8 [[R8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8), align 1
+; CHECK-NEXT:    store i8 [[R9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9), align 1
+; CHECK-NEXT:    store i8 [[R10]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+; CHECK-NEXT:    store i8 [[R11]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+; CHECK-NEXT:    store i8 [[R12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+; CHECK-NEXT:    store i8 [[R13]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+; CHECK-NEXT:    store i8 [[R14]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+; CHECK-NEXT:    store i8 [[R15]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+; CHECK-NEXT:    store i8 [[R16]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; CHECK-NEXT:    store i8 [[R17]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+; CHECK-NEXT:    store i8 [[R18]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+; CHECK-NEXT:    store i8 [[R19]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+; CHECK-NEXT:    store i8 [[R20]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+; CHECK-NEXT:    store i8 [[R21]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+; CHECK-NEXT:    store i8 [[R22]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+; CHECK-NEXT:    store i8 [[R23]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+; CHECK-NEXT:    store i8 [[R24]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+; CHECK-NEXT:    store i8 [[R25]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+; CHECK-NEXT:    store i8 [[R26]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+; CHECK-NEXT:    store i8 [[R27]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+; CHECK-NEXT:    store i8 [[R28]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+; CHECK-NEXT:    store i8 [[R29]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+; CHECK-NEXT:    store i8 [[R30]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+; CHECK-NEXT:    store i8 [[R31]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+; CHECK-NEXT:    store i8 [[R32]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; CHECK-NEXT:    store i8 [[R33]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+; CHECK-NEXT:    store i8 [[R34]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+; CHECK-NEXT:    store i8 [[R35]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+; CHECK-NEXT:    store i8 [[R36]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+; CHECK-NEXT:    store i8 [[R37]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+; CHECK-NEXT:    store i8 [[R38]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+; CHECK-NEXT:    store i8 [[R39]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+; CHECK-NEXT:    store i8 [[R40]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+; CHECK-NEXT:    store i8 [[R41]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+; CHECK-NEXT:    store i8 [[R42]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+; CHECK-NEXT:    store i8 [[R43]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+; CHECK-NEXT:    store i8 [[R44]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+; CHECK-NEXT:    store i8 [[R45]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+; CHECK-NEXT:    store i8 [[R46]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+; CHECK-NEXT:    store i8 [[R47]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+; CHECK-NEXT:    store i8 [[R48]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; CHECK-NEXT:    store i8 [[R49]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+; CHECK-NEXT:    store i8 [[R50]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+; CHECK-NEXT:    store i8 [[R51]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+; CHECK-NEXT:    store i8 [[R52]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+; CHECK-NEXT:    store i8 [[R53]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+; CHECK-NEXT:    store i8 [[R54]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+; CHECK-NEXT:    store i8 [[R55]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+; CHECK-NEXT:    store i8 [[R56]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+; CHECK-NEXT:    store i8 [[R57]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+; CHECK-NEXT:    store i8 [[R58]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+; CHECK-NEXT:    store i8 [[R59]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+; CHECK-NEXT:    store i8 [[R60]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+; CHECK-NEXT:    store i8 [[R61]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+; CHECK-NEXT:    store i8 [[R62]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+; CHECK-NEXT:    store i8 [[R63]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %c0  = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %a0 , i8 %b0 )
   %c1  = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %a1 , i8 %b1 )
   %c2  = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %a2 , i8 %b2 )
@@ -1186,69 +1186,69 @@ define void @mul_v64i8() {
   %r61 = extractvalue {i8, i1} %c61, 0
   %r62 = extractvalue {i8, i1} %c62, 0
   %r63 = extractvalue {i8, i1} %c63, 0
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
index 1b1e48abd2b67..4126f06e8ca81 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
@@ -27,22 +27,22 @@ declare {i8 , i1} @llvm.umul.with.overflow.i8 (i8 , i8 )
 
 define void @mul_v8i64() {
 ; CHECK-LABEL: @mul_v8i64(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A0]], i64 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A1]], i64 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A2]], i64 [[B2]])
@@ -59,32 +59,32 @@ define void @mul_v8i64() {
 ; CHECK-NEXT:    [[R5:%.*]] = extractvalue { i64, i1 } [[C5]], 0
 ; CHECK-NEXT:    [[R6:%.*]] = extractvalue { i64, i1 } [[C6]], 0
 ; CHECK-NEXT:    [[R7:%.*]] = extractvalue { i64, i1 } [[C7]], 0
-; CHECK-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; CHECK-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; CHECK-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; CHECK-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; CHECK-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; CHECK-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; CHECK-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; CHECK-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; CHECK-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; CHECK-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; CHECK-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; CHECK-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; CHECK-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %c0 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %a0, i64 %b0)
   %c1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %a1, i64 %b1)
   %c2 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %a2, i64 %b2)
@@ -101,51 +101,51 @@ define void @mul_v8i64() {
   %r5 = extractvalue {i64, i1} %c5, 0
   %r6 = extractvalue {i64, i1} %c6, 0
   %r7 = extractvalue {i64, i1} %c7, 0
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @mul_v16i32() {
 ; CHECK-LABEL: @mul_v16i32(
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; CHECK-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; CHECK-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; CHECK-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    [[C0:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[A0]], i32 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[A1]], i32 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[A2]], i32 [[B2]])
@@ -178,56 +178,56 @@ define void @mul_v16i32() {
 ; CHECK-NEXT:    [[R13:%.*]] = extractvalue { i32, i1 } [[C13]], 0
 ; CHECK-NEXT:    [[R14:%.*]] = extractvalue { i32, i1 } [[C14]], 0
 ; CHECK-NEXT:    [[R15:%.*]] = extractvalue { i32, i1 } [[C15]], 0
-; CHECK-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; CHECK-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; CHECK-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; CHECK-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; CHECK-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; CHECK-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; CHECK-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; CHECK-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; CHECK-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; CHECK-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; CHECK-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; CHECK-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; CHECK-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; CHECK-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; CHECK-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; CHECK-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; CHECK-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; CHECK-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; CHECK-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; CHECK-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; CHECK-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; CHECK-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; CHECK-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; CHECK-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; CHECK-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; CHECK-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; CHECK-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; CHECK-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; CHECK-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; CHECK-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; CHECK-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %c0  = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a0 , i32 %b0 )
   %c1  = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a1 , i32 %b1 )
   %c2  = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a2 , i32 %b2 )
@@ -260,91 +260,91 @@ define void @mul_v16i32() {
   %r13 = extractvalue {i32, i1} %c13, 0
   %r14 = extractvalue {i32, i1} %c14, 0
   %r15 = extractvalue {i32, i1} %c15, 0
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @mul_v32i16() {
 ; CHECK-LABEL: @mul_v32i16(
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr @a16, align 2
+; CHECK-NEXT:    [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr @b16, align 2
+; CHECK-NEXT:    [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    [[C0:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 [[A0]], i16 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 [[A1]], i16 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 [[A2]], i16 [[B2]])
@@ -409,104 +409,104 @@ define void @mul_v32i16() {
 ; CHECK-NEXT:    [[R29:%.*]] = extractvalue { i16, i1 } [[C29]], 0
 ; CHECK-NEXT:    [[R30:%.*]] = extractvalue { i16, i1 } [[C30]], 0
 ; CHECK-NEXT:    [[R31:%.*]] = extractvalue { i16, i1 } [[C31]], 0
-; CHECK-NEXT:    store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
-; CHECK-NEXT:    store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
-; CHECK-NEXT:    store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
-; CHECK-NEXT:    store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
-; CHECK-NEXT:    store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
-; CHECK-NEXT:    store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
-; CHECK-NEXT:    store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
-; CHECK-NEXT:    store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
-; CHECK-NEXT:    store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
-; CHECK-NEXT:    store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
-; CHECK-NEXT:    store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-; CHECK-NEXT:    store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-; CHECK-NEXT:    store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-; CHECK-NEXT:    store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-; CHECK-NEXT:    store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-; CHECK-NEXT:    store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-; CHECK-NEXT:    store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-; CHECK-NEXT:    store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-; CHECK-NEXT:    store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-; CHECK-NEXT:    store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-; CHECK-NEXT:    store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-; CHECK-NEXT:    store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-; CHECK-NEXT:    store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-; CHECK-NEXT:    store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-; CHECK-NEXT:    store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-; CHECK-NEXT:    store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-; CHECK-NEXT:    store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-; CHECK-NEXT:    store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-; CHECK-NEXT:    store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-; CHECK-NEXT:    store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-; CHECK-NEXT:    store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-; CHECK-NEXT:    store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+; CHECK-NEXT:    store i16 [[R0]], ptr @c16, align 2
+; CHECK-NEXT:    store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1), align 2
+; CHECK-NEXT:    store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2), align 2
+; CHECK-NEXT:    store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3), align 2
+; CHECK-NEXT:    store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4), align 2
+; CHECK-NEXT:    store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5), align 2
+; CHECK-NEXT:    store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6), align 2
+; CHECK-NEXT:    store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7), align 2
+; CHECK-NEXT:    store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; CHECK-NEXT:    store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9), align 2
+; CHECK-NEXT:    store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+; CHECK-NEXT:    store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+; CHECK-NEXT:    store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+; CHECK-NEXT:    store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+; CHECK-NEXT:    store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+; CHECK-NEXT:    store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+; CHECK-NEXT:    store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; CHECK-NEXT:    store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+; CHECK-NEXT:    store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+; CHECK-NEXT:    store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+; CHECK-NEXT:    store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+; CHECK-NEXT:    store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+; CHECK-NEXT:    store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+; CHECK-NEXT:    store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+; CHECK-NEXT:    store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; CHECK-NEXT:    store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+; CHECK-NEXT:    store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+; CHECK-NEXT:    store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+; CHECK-NEXT:    store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+; CHECK-NEXT:    store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+; CHECK-NEXT:    store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+; CHECK-NEXT:    store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %c0  = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %a0 , i16 %b0 )
   %c1  = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %a1 , i16 %b1 )
   %c2  = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %a2 , i16 %b2 )
@@ -571,171 +571,171 @@ define void @mul_v32i16() {
   %r29 = extractvalue {i16, i1} %c29, 0
   %r30 = extractvalue {i16, i1} %c30, 0
   %r31 = extractvalue {i16, i1} %c31, 0
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @mul_v64i8() {
 ; CHECK-LABEL: @mul_v64i8(
-; CHECK-NEXT:    [[A0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[A1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[A2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[A3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[A4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[A5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[A6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[A7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[A8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[A9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[A10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[A11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[A12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[A13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[A14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[A15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[A16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[A17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[A18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[A19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[A20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[A21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[A22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[A23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[A24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[A25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[A26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[A27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[A28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[A29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[A30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[A31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[A32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[A33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[A34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[A35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[A36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[A37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[A38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[A39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[A40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[A41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[A42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[A43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[A44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[A45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[A46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[A47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[A48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[A49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[A50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[A51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[A52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[A53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[A54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[A55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[A56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[A57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[A58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[A59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[A60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[A61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[A62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[A63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-; CHECK-NEXT:    [[B0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[B1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[B2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[B3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[B4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[B5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[B6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[B7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[B8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[B9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[B10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[B11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[B12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[B13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[B14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[B15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[B16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[B17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[B18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[B19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[B20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[B21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[B22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[B23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[B24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[B25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[B26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[B27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[B28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[B29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[B30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[B31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[B32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[B33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[B34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[B35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[B36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[B37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[B38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[B39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[B40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[B41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[B42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[B43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[B44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[B45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[B46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[B47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[B48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[B49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[B50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[B51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[B52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[B53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[B54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[B55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[B56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[B57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[B58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[B59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[B60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[B61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[B62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[B63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[A0:%.*]] = load i8, ptr @a8, align 1
+; CHECK-NEXT:    [[A1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[A2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[A3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[A4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[A5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[A6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[A7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[A8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[A9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[A10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[A11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[A12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[A13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[A14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[A15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[A16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[A17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[A18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[A19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[A20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[A21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[A22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[A23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[A24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[A25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[A26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[A27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[A28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[A29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[A30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[A31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[A32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[A33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[A34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[A35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[A36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[A37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[A38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[A39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[A40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[A41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[A42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[A43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[A44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[A45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[A46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[A47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[A48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[A49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[A50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[A51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[A52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[A53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[A54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[A55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[A56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[A57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[A58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[A59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[A60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[A61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[A62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[A63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[B0:%.*]] = load i8, ptr @b8, align 1
+; CHECK-NEXT:    [[B1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[B2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[B3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[B4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[B5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[B6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[B7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[B8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[B9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[B10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[B11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[B12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[B13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[B14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[B15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[B16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[B17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[B18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[B19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[B20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[B21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[B22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[B23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[B24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[B25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[B26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[B27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[B28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[B29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[B30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[B31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[B32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[B33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[B34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[B35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[B36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[B37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[B38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[B39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[B40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[B41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[B42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[B43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[B44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[B45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[B46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[B47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[B48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[B49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[B50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[B51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[B52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[B53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[B54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[B55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[B56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[B57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[B58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[B59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[B60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[B61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[B62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[B63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    [[C0:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A0]], i8 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A1]], i8 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A2]], i8 [[B2]])
@@ -864,200 +864,200 @@ define void @mul_v64i8() {
 ; CHECK-NEXT:    [[R61:%.*]] = extractvalue { i8, i1 } [[C61]], 0
 ; CHECK-NEXT:    [[R62:%.*]] = extractvalue { i8, i1 } [[C62]], 0
 ; CHECK-NEXT:    [[R63:%.*]] = extractvalue { i8, i1 } [[C63]], 0
-; CHECK-NEXT:    store i8 [[R0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0), align 1
-; CHECK-NEXT:    store i8 [[R1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1), align 1
-; CHECK-NEXT:    store i8 [[R2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2), align 1
-; CHECK-NEXT:    store i8 [[R3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3), align 1
-; CHECK-NEXT:    store i8 [[R4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4), align 1
-; CHECK-NEXT:    store i8 [[R5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5), align 1
-; CHECK-NEXT:    store i8 [[R6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6), align 1
-; CHECK-NEXT:    store i8 [[R7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7), align 1
-; CHECK-NEXT:    store i8 [[R8]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8), align 1
-; CHECK-NEXT:    store i8 [[R9]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9), align 1
-; CHECK-NEXT:    store i8 [[R10]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-; CHECK-NEXT:    store i8 [[R11]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-; CHECK-NEXT:    store i8 [[R12]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-; CHECK-NEXT:    store i8 [[R13]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-; CHECK-NEXT:    store i8 [[R14]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-; CHECK-NEXT:    store i8 [[R15]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-; CHECK-NEXT:    store i8 [[R16]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-; CHECK-NEXT:    store i8 [[R17]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-; CHECK-NEXT:    store i8 [[R18]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-; CHECK-NEXT:    store i8 [[R19]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-; CHECK-NEXT:    store i8 [[R20]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-; CHECK-NEXT:    store i8 [[R21]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-; CHECK-NEXT:    store i8 [[R22]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-; CHECK-NEXT:    store i8 [[R23]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-; CHECK-NEXT:    store i8 [[R24]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-; CHECK-NEXT:    store i8 [[R25]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-; CHECK-NEXT:    store i8 [[R26]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-; CHECK-NEXT:    store i8 [[R27]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-; CHECK-NEXT:    store i8 [[R28]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-; CHECK-NEXT:    store i8 [[R29]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-; CHECK-NEXT:    store i8 [[R30]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-; CHECK-NEXT:    store i8 [[R31]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-; CHECK-NEXT:    store i8 [[R32]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-; CHECK-NEXT:    store i8 [[R33]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-; CHECK-NEXT:    store i8 [[R34]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-; CHECK-NEXT:    store i8 [[R35]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-; CHECK-NEXT:    store i8 [[R36]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-; CHECK-NEXT:    store i8 [[R37]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-; CHECK-NEXT:    store i8 [[R38]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-; CHECK-NEXT:    store i8 [[R39]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-; CHECK-NEXT:    store i8 [[R40]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-; CHECK-NEXT:    store i8 [[R41]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-; CHECK-NEXT:    store i8 [[R42]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-; CHECK-NEXT:    store i8 [[R43]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-; CHECK-NEXT:    store i8 [[R44]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-; CHECK-NEXT:    store i8 [[R45]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-; CHECK-NEXT:    store i8 [[R46]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-; CHECK-NEXT:    store i8 [[R47]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-; CHECK-NEXT:    store i8 [[R48]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-; CHECK-NEXT:    store i8 [[R49]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-; CHECK-NEXT:    store i8 [[R50]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-; CHECK-NEXT:    store i8 [[R51]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-; CHECK-NEXT:    store i8 [[R52]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-; CHECK-NEXT:    store i8 [[R53]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-; CHECK-NEXT:    store i8 [[R54]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-; CHECK-NEXT:    store i8 [[R55]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-; CHECK-NEXT:    store i8 [[R56]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-; CHECK-NEXT:    store i8 [[R57]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-; CHECK-NEXT:    store i8 [[R58]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-; CHECK-NEXT:    store i8 [[R59]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-; CHECK-NEXT:    store i8 [[R60]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-; CHECK-NEXT:    store i8 [[R61]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-; CHECK-NEXT:    store i8 [[R62]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-; CHECK-NEXT:    store i8 [[R63]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+; CHECK-NEXT:    store i8 [[R0]], ptr @c8, align 1
+; CHECK-NEXT:    store i8 [[R1]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1), align 1
+; CHECK-NEXT:    store i8 [[R2]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2), align 1
+; CHECK-NEXT:    store i8 [[R3]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3), align 1
+; CHECK-NEXT:    store i8 [[R4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4), align 1
+; CHECK-NEXT:    store i8 [[R5]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5), align 1
+; CHECK-NEXT:    store i8 [[R6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6), align 1
+; CHECK-NEXT:    store i8 [[R7]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7), align 1
+; CHECK-NEXT:    store i8 [[R8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8), align 1
+; CHECK-NEXT:    store i8 [[R9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9), align 1
+; CHECK-NEXT:    store i8 [[R10]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+; CHECK-NEXT:    store i8 [[R11]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+; CHECK-NEXT:    store i8 [[R12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+; CHECK-NEXT:    store i8 [[R13]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+; CHECK-NEXT:    store i8 [[R14]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+; CHECK-NEXT:    store i8 [[R15]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+; CHECK-NEXT:    store i8 [[R16]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; CHECK-NEXT:    store i8 [[R17]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+; CHECK-NEXT:    store i8 [[R18]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+; CHECK-NEXT:    store i8 [[R19]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+; CHECK-NEXT:    store i8 [[R20]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+; CHECK-NEXT:    store i8 [[R21]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+; CHECK-NEXT:    store i8 [[R22]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+; CHECK-NEXT:    store i8 [[R23]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+; CHECK-NEXT:    store i8 [[R24]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+; CHECK-NEXT:    store i8 [[R25]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+; CHECK-NEXT:    store i8 [[R26]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+; CHECK-NEXT:    store i8 [[R27]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+; CHECK-NEXT:    store i8 [[R28]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+; CHECK-NEXT:    store i8 [[R29]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+; CHECK-NEXT:    store i8 [[R30]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+; CHECK-NEXT:    store i8 [[R31]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+; CHECK-NEXT:    store i8 [[R32]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; CHECK-NEXT:    store i8 [[R33]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+; CHECK-NEXT:    store i8 [[R34]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+; CHECK-NEXT:    store i8 [[R35]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+; CHECK-NEXT:    store i8 [[R36]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+; CHECK-NEXT:    store i8 [[R37]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+; CHECK-NEXT:    store i8 [[R38]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+; CHECK-NEXT:    store i8 [[R39]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+; CHECK-NEXT:    store i8 [[R40]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+; CHECK-NEXT:    store i8 [[R41]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+; CHECK-NEXT:    store i8 [[R42]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+; CHECK-NEXT:    store i8 [[R43]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+; CHECK-NEXT:    store i8 [[R44]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+; CHECK-NEXT:    store i8 [[R45]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+; CHECK-NEXT:    store i8 [[R46]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+; CHECK-NEXT:    store i8 [[R47]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+; CHECK-NEXT:    store i8 [[R48]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; CHECK-NEXT:    store i8 [[R49]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+; CHECK-NEXT:    store i8 [[R50]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+; CHECK-NEXT:    store i8 [[R51]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+; CHECK-NEXT:    store i8 [[R52]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+; CHECK-NEXT:    store i8 [[R53]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+; CHECK-NEXT:    store i8 [[R54]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+; CHECK-NEXT:    store i8 [[R55]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+; CHECK-NEXT:    store i8 [[R56]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+; CHECK-NEXT:    store i8 [[R57]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+; CHECK-NEXT:    store i8 [[R58]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+; CHECK-NEXT:    store i8 [[R59]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+; CHECK-NEXT:    store i8 [[R60]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+; CHECK-NEXT:    store i8 [[R61]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+; CHECK-NEXT:    store i8 [[R62]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+; CHECK-NEXT:    store i8 [[R63]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %c0  = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %a0 , i8 %b0 )
   %c1  = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %a1 , i8 %b1 )
   %c2  = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %a2 , i8 %b2 )
@@ -1186,69 +1186,69 @@ define void @mul_v64i8() {
   %r61 = extractvalue {i8, i1} %c61, 0
   %r62 = extractvalue {i8, i1} %c62, 0
   %r63 = extractvalue {i8, i1} %c63, 0
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
index 5f2193df03266..94976a8cdee25 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
@@ -24,22 +24,22 @@
 
 define void @mul_v8i64() {
 ; SSE-LABEL: @mul_v8i64(
-; SSE-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[R0:%.*]] = mul i64 [[A0]], [[B0]]
 ; SSE-NEXT:    [[R1:%.*]] = mul i64 [[A1]], [[B1]]
 ; SSE-NEXT:    [[R2:%.*]] = mul i64 [[A2]], [[B2]]
@@ -48,33 +48,33 @@ define void @mul_v8i64() {
 ; SSE-NEXT:    [[R5:%.*]] = mul i64 [[A5]], [[B5]]
 ; SSE-NEXT:    [[R6:%.*]] = mul i64 [[A6]], [[B6]]
 ; SSE-NEXT:    [[R7:%.*]] = mul i64 [[A7]], [[B7]]
-; SSE-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @mul_v8i64(
-; SLM-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SLM-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SLM-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SLM-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SLM-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SLM-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SLM-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SLM-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SLM-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SLM-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SLM-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SLM-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SLM-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SLM-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SLM-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SLM-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SLM-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SLM-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SLM-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SLM-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SLM-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SLM-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SLM-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SLM-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SLM-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SLM-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SLM-NEXT:    [[R0:%.*]] = mul i64 [[A0]], [[B0]]
 ; SLM-NEXT:    [[R1:%.*]] = mul i64 [[A1]], [[B1]]
 ; SLM-NEXT:    [[R2:%.*]] = mul i64 [[A2]], [[B2]]
@@ -83,69 +83,69 @@ define void @mul_v8i64() {
 ; SLM-NEXT:    [[R5:%.*]] = mul i64 [[A5]], [[B5]]
 ; SLM-NEXT:    [[R6:%.*]] = mul i64 [[A6]], [[B6]]
 ; SLM-NEXT:    [[R7:%.*]] = mul i64 [[A7]], [[B7]]
-; SLM-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SLM-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SLM-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SLM-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SLM-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SLM-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SLM-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SLM-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SLM-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SLM-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SLM-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SLM-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SLM-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX128-LABEL: @mul_v8i64(
-; AVX128-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; AVX128-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; AVX128-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; AVX128-NEXT:    [[TMP3:%.*]] = mul <2 x i64> [[TMP1]], [[TMP2]]
-; AVX128-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; AVX128-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; AVX128-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; AVX128-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; AVX128-NEXT:    [[TMP6:%.*]] = mul <2 x i64> [[TMP4]], [[TMP5]]
-; AVX128-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; AVX128-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; AVX128-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX128-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX128-NEXT:    [[TMP9:%.*]] = mul <2 x i64> [[TMP7]], [[TMP8]]
-; AVX128-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; AVX128-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX128-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX128-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; AVX128-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; AVX128-NEXT:    [[TMP12:%.*]] = mul <2 x i64> [[TMP10]], [[TMP11]]
-; AVX128-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX128-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; AVX128-NEXT:    ret void
 ;
 ; AVX256-LABEL: @mul_v8i64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX256-NEXT:    [[TMP3:%.*]] = mul <4 x i64> [[TMP1]], [[TMP2]]
-; AVX256-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX256-NEXT:    [[TMP6:%.*]] = mul <4 x i64> [[TMP4]], [[TMP5]]
-; AVX256-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @mul_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = mul <8 x i64> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = mul i64 %a0, %b0
   %r1 = mul i64 %a1, %b1
   %r2 = mul i64 %a2, %b2
@@ -154,125 +154,125 @@ define void @mul_v8i64() {
   %r5 = mul i64 %a5, %b5
   %r6 = mul i64 %a6, %b6
   %r7 = mul i64 %a7, %b7
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @mul_v16i32() {
 ; SSE-LABEL: @mul_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = mul <4 x i32> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @mul_v16i32(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP12:%.*]] = mul <4 x i32> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX128-LABEL: @mul_v16i32(
-; AVX128-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; AVX128-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; AVX128-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; AVX128-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP2]]
-; AVX128-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; AVX128-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; AVX128-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; AVX128-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; AVX128-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP4]], [[TMP5]]
-; AVX128-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; AVX128-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; AVX128-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX128-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX128-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP7]], [[TMP8]]
-; AVX128-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; AVX128-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; AVX128-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; AVX128-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; AVX128-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; AVX128-NEXT:    [[TMP12:%.*]] = mul <4 x i32> [[TMP10]], [[TMP11]]
-; AVX128-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; AVX128-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; AVX128-NEXT:    ret void
 ;
 ; AVX256-LABEL: @mul_v16i32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = mul <8 x i32> [[TMP1]], [[TMP2]]
-; AVX256-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX256-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = mul <8 x i32> [[TMP4]], [[TMP5]]
-; AVX256-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX256-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @mul_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = mul <16 x i32> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = mul i32 %a0 , %b0
   %r1  = mul i32 %a1 , %b1
   %r2  = mul i32 %a2 , %b2
@@ -289,165 +289,165 @@ define void @mul_v16i32() {
   %r13 = mul i32 %a13, %b13
   %r14 = mul i32 %a14, %b14
   %r15 = mul i32 %a15, %b15
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @mul_v32i16() {
 ; SSE-LABEL: @mul_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = mul <8 x i16> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = mul <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = mul <8 x i16> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = mul <8 x i16> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @mul_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = mul <8 x i16> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = mul <8 x i16> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = mul <8 x i16> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = mul <8 x i16> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX128-LABEL: @mul_v32i16(
-; AVX128-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; AVX128-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; AVX128-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; AVX128-NEXT:    [[TMP3:%.*]] = mul <8 x i16> [[TMP1]], [[TMP2]]
-; AVX128-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; AVX128-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; AVX128-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; AVX128-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; AVX128-NEXT:    [[TMP6:%.*]] = mul <8 x i16> [[TMP4]], [[TMP5]]
-; AVX128-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; AVX128-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; AVX128-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX128-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX128-NEXT:    [[TMP9:%.*]] = mul <8 x i16> [[TMP7]], [[TMP8]]
-; AVX128-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; AVX128-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; AVX128-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; AVX128-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; AVX128-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; AVX128-NEXT:    [[TMP12:%.*]] = mul <8 x i16> [[TMP10]], [[TMP11]]
-; AVX128-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; AVX128-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; AVX128-NEXT:    ret void
 ;
 ; AVX256-LABEL: @mul_v32i16(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX256-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX256-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX256-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX256-NEXT:    [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], [[TMP2]]
-; AVX256-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX256-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX256-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX256-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX256-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX256-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX256-NEXT:    [[TMP6:%.*]] = mul <16 x i16> [[TMP4]], [[TMP5]]
-; AVX256-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX256-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @mul_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = mul <32 x i16> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = mul i16 %a0 , %b0
   %r1  = mul i16 %a1 , %b1
   %r2  = mul i16 %a2 , %b2
@@ -480,245 +480,245 @@ define void @mul_v32i16() {
   %r29 = mul i16 %a29, %b29
   %r30 = mul i16 %a30, %b30
   %r31 = mul i16 %a31, %b31
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @mul_v64i8() {
 ; SSE-LABEL: @mul_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = mul <16 x i8> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = mul <16 x i8> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = mul <16 x i8> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = mul <16 x i8> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @mul_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = mul <16 x i8> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = mul <16 x i8> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = mul <16 x i8> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = mul <16 x i8> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX128-LABEL: @mul_v64i8(
-; AVX128-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; AVX128-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; AVX128-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; AVX128-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; AVX128-NEXT:    [[TMP3:%.*]] = mul <16 x i8> [[TMP1]], [[TMP2]]
-; AVX128-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; AVX128-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; AVX128-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; AVX128-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; AVX128-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; AVX128-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; AVX128-NEXT:    [[TMP6:%.*]] = mul <16 x i8> [[TMP4]], [[TMP5]]
-; AVX128-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; AVX128-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; AVX128-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; AVX128-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; AVX128-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX128-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX128-NEXT:    [[TMP9:%.*]] = mul <16 x i8> [[TMP7]], [[TMP8]]
-; AVX128-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; AVX128-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; AVX128-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; AVX128-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; AVX128-NEXT:    [[TMP12:%.*]] = mul <16 x i8> [[TMP10]], [[TMP11]]
-; AVX128-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; AVX128-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; AVX128-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; AVX128-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; AVX128-NEXT:    ret void
 ;
 ; AVX256-LABEL: @mul_v64i8(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX256-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX256-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX256-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX256-NEXT:    [[TMP3:%.*]] = mul <32 x i8> [[TMP1]], [[TMP2]]
-; AVX256-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX256-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX256-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX256-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX256-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX256-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX256-NEXT:    [[TMP6:%.*]] = mul <32 x i8> [[TMP4]], [[TMP5]]
-; AVX256-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX256-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @mul_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = mul <64 x i8> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = mul i8 %a0 , %b0
   %r1  = mul i8 %a1 , %b1
   %r2  = mul i8 %a2 , %b2
@@ -783,69 +783,69 @@ define void @mul_v64i8() {
   %r61 = mul i8 %a61, %b61
   %r62 = mul i8 %a62, %b62
   %r63 = mul i8 %a63, %b63
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
index 679e80124013b..c63b672f4187c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
@@ -27,58 +27,58 @@ declare i8  @llvm.smax.i8 (i8 , i8 )
 
 define void @smax_v8i64() {
 ; SSE-LABEL: @smax_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smax_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smax_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.smax.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.smax.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.smax.i64(i64 %a2, i64 %b2)
@@ -87,87 +87,87 @@ define void @smax_v8i64() {
   %r5 = call i64 @llvm.smax.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.smax.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.smax.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @smax_v16i32() {
 ; SSE-LABEL: @smax_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smax_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smax_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.smax.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.smax.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.smax.i32(i32 %a2 , i32 %b2 )
@@ -184,127 +184,127 @@ define void @smax_v16i32() {
   %r13 = call i32 @llvm.smax.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.smax.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.smax.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @smax_v32i16() {
 ; SSE-LABEL: @smax_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smax_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smax_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.smax.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.smax.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.smax.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.smax.i16(i16 %a2 , i16 %b2 )
@@ -337,207 +337,207 @@ define void @smax_v32i16() {
   %r29 = call i16 @llvm.smax.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.smax.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.smax.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @smax_v64i8() {
 ; SSE-LABEL: @smax_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smax_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.smax.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.smax.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smax_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.smax.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.smax.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.smax.i8(i8 %a2 , i8 %b2 )
@@ -602,69 +602,69 @@ define void @smax_v64i8() {
   %r61 = call i8 @llvm.smax.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.smax.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.smax.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
index dcf0c5e3e0f22..826f97f2a2d89 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
@@ -27,58 +27,58 @@ declare i8  @llvm.smin.i8 (i8 , i8 )
 
 define void @smin_v8i64() {
 ; SSE-LABEL: @smin_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smin_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smin_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.smin.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.smin.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.smin.i64(i64 %a2, i64 %b2)
@@ -87,87 +87,87 @@ define void @smin_v8i64() {
   %r5 = call i64 @llvm.smin.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.smin.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.smin.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @smin_v16i32() {
 ; SSE-LABEL: @smin_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smin_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smin_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.smin.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.smin.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.smin.i32(i32 %a2 , i32 %b2 )
@@ -184,127 +184,127 @@ define void @smin_v16i32() {
   %r13 = call i32 @llvm.smin.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.smin.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.smin.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @smin_v32i16() {
 ; SSE-LABEL: @smin_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smin_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smin_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.smin.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.smin.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.smin.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.smin.i16(i16 %a2 , i16 %b2 )
@@ -337,207 +337,207 @@ define void @smin_v32i16() {
   %r29 = call i16 @llvm.smin.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.smin.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.smin.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @smin_v64i8() {
 ; SSE-LABEL: @smin_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @smin_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.smin.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.smin.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @smin_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.smin.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.smin.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.smin.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.smin.i8(i8 %a2 , i8 %b2 )
@@ -602,69 +602,69 @@ define void @smin_v64i8() {
   %r61 = call i8 @llvm.smin.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.smin.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.smin.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
index cf8d139ceb070..afaab8b8ca642 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
@@ -27,22 +27,22 @@ declare i8  @llvm.ssub.sat.i8 (i8 , i8 )
 
 define void @sub_v8i64() {
 ; SSE-LABEL: @sub_v8i64(
-; SSE-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[R0:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A0]], i64 [[B0]])
 ; SSE-NEXT:    [[R1:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A1]], i64 [[B1]])
 ; SSE-NEXT:    [[R2:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A2]], i64 [[B2]])
@@ -51,33 +51,33 @@ define void @sub_v8i64() {
 ; SSE-NEXT:    [[R5:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A5]], i64 [[B5]])
 ; SSE-NEXT:    [[R6:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A6]], i64 [[B6]])
 ; SSE-NEXT:    [[R7:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A7]], i64 [[B7]])
-; SSE-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v8i64(
-; SLM-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SLM-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SLM-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SLM-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SLM-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SLM-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SLM-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SLM-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SLM-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SLM-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SLM-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SLM-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SLM-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SLM-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SLM-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SLM-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SLM-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SLM-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SLM-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SLM-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SLM-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SLM-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SLM-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SLM-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SLM-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SLM-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SLM-NEXT:    [[R0:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A0]], i64 [[B0]])
 ; SLM-NEXT:    [[R1:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A1]], i64 [[B1]])
 ; SLM-NEXT:    [[R2:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A2]], i64 [[B2]])
@@ -86,79 +86,79 @@ define void @sub_v8i64() {
 ; SLM-NEXT:    [[R5:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A5]], i64 [[B5]])
 ; SLM-NEXT:    [[R6:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A6]], i64 [[B6]])
 ; SLM-NEXT:    [[R7:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A7]], i64 [[B7]])
-; SLM-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SLM-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SLM-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SLM-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SLM-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SLM-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SLM-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SLM-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SLM-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SLM-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SLM-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SLM-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SLM-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX1-LABEL: @sub_v8i64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; AVX1-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
-; AVX1-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; AVX1-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; AVX1-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
-; AVX1-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-; AVX1-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; AVX1-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; AVX1-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @sub_v8i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX2-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX2-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256BW-LABEL: @sub_v8i64(
-; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX256BW-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr @b64, align 8
+; AVX256BW-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    [[TMP5:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP3]])
 ; AVX256BW-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP2]], <4 x i64> [[TMP4]])
-; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], ptr @c64, align 8
+; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX256BW-NEXT:    ret void
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.ssub.sat.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.ssub.sat.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.ssub.sat.i64(i64 %a2, i64 %b2)
@@ -167,106 +167,106 @@ define void @sub_v8i64() {
   %r5 = call i64 @llvm.ssub.sat.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.ssub.sat.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.ssub.sat.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sub_v16i32() {
 ; SSE-LABEL: @sub_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v16i32(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SLM-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SLM-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SLM-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.ssub.sat.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.ssub.sat.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.ssub.sat.i32(i32 %a2 , i32 %b2 )
@@ -283,146 +283,146 @@ define void @sub_v16i32() {
   %r13 = call i32 @llvm.ssub.sat.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.ssub.sat.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.ssub.sat.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sub_v32i16() {
 ; SSE-LABEL: @sub_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.ssub.sat.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.ssub.sat.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.ssub.sat.i16(i16 %a2 , i16 %b2 )
@@ -455,226 +455,226 @@ define void @sub_v32i16() {
   %r29 = call i16 @llvm.ssub.sat.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.ssub.sat.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.ssub.sat.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @sub_v64i8() {
 ; SSE-LABEL: @sub_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.ssub.sat.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.ssub.sat.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.ssub.sat.i8(i8 %a2 , i8 %b2 )
@@ -739,69 +739,69 @@ define void @sub_v64i8() {
   %r61 = call i8 @llvm.ssub.sat.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.ssub.sat.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.ssub.sat.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
index 850fe9fd85936..d628dddd16cb1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
@@ -27,22 +27,22 @@ declare {i8 , i1} @llvm.ssub.with.overflow.i8 (i8 , i8 )
 
 define void @sub_v8i64() {
 ; CHECK-LABEL: @sub_v8i64(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A0]], i64 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A1]], i64 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A2]], i64 [[B2]])
@@ -59,32 +59,32 @@ define void @sub_v8i64() {
 ; CHECK-NEXT:    [[R5:%.*]] = extractvalue { i64, i1 } [[C5]], 0
 ; CHECK-NEXT:    [[R6:%.*]] = extractvalue { i64, i1 } [[C6]], 0
 ; CHECK-NEXT:    [[R7:%.*]] = extractvalue { i64, i1 } [[C7]], 0
-; CHECK-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; CHECK-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; CHECK-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; CHECK-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; CHECK-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; CHECK-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; CHECK-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; CHECK-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; CHECK-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; CHECK-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; CHECK-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; CHECK-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; CHECK-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %c0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a0, i64 %b0)
   %c1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a1, i64 %b1)
   %c2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a2, i64 %b2)
@@ -101,51 +101,51 @@ define void @sub_v8i64() {
   %r5 = extractvalue {i64, i1} %c5, 0
   %r6 = extractvalue {i64, i1} %c6, 0
   %r7 = extractvalue {i64, i1} %c7, 0
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sub_v16i32() {
 ; CHECK-LABEL: @sub_v16i32(
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; CHECK-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; CHECK-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; CHECK-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    [[C0:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A0]], i32 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A1]], i32 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A2]], i32 [[B2]])
@@ -178,56 +178,56 @@ define void @sub_v16i32() {
 ; CHECK-NEXT:    [[R13:%.*]] = extractvalue { i32, i1 } [[C13]], 0
 ; CHECK-NEXT:    [[R14:%.*]] = extractvalue { i32, i1 } [[C14]], 0
 ; CHECK-NEXT:    [[R15:%.*]] = extractvalue { i32, i1 } [[C15]], 0
-; CHECK-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; CHECK-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; CHECK-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; CHECK-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; CHECK-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; CHECK-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; CHECK-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; CHECK-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; CHECK-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; CHECK-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; CHECK-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; CHECK-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; CHECK-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; CHECK-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; CHECK-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; CHECK-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; CHECK-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; CHECK-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; CHECK-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; CHECK-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; CHECK-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; CHECK-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; CHECK-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; CHECK-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; CHECK-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; CHECK-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; CHECK-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; CHECK-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; CHECK-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; CHECK-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; CHECK-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %c0  = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a0 , i32 %b0 )
   %c1  = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a1 , i32 %b1 )
   %c2  = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a2 , i32 %b2 )
@@ -260,91 +260,91 @@ define void @sub_v16i32() {
   %r13 = extractvalue {i32, i1} %c13, 0
   %r14 = extractvalue {i32, i1} %c14, 0
   %r15 = extractvalue {i32, i1} %c15, 0
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sub_v32i16() {
 ; CHECK-LABEL: @sub_v32i16(
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr @a16, align 2
+; CHECK-NEXT:    [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr @b16, align 2
+; CHECK-NEXT:    [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    [[C0:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[A0]], i16 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[A1]], i16 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[A2]], i16 [[B2]])
@@ -409,104 +409,104 @@ define void @sub_v32i16() {
 ; CHECK-NEXT:    [[R29:%.*]] = extractvalue { i16, i1 } [[C29]], 0
 ; CHECK-NEXT:    [[R30:%.*]] = extractvalue { i16, i1 } [[C30]], 0
 ; CHECK-NEXT:    [[R31:%.*]] = extractvalue { i16, i1 } [[C31]], 0
-; CHECK-NEXT:    store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
-; CHECK-NEXT:    store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
-; CHECK-NEXT:    store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
-; CHECK-NEXT:    store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
-; CHECK-NEXT:    store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
-; CHECK-NEXT:    store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
-; CHECK-NEXT:    store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
-; CHECK-NEXT:    store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
-; CHECK-NEXT:    store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
-; CHECK-NEXT:    store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
-; CHECK-NEXT:    store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-; CHECK-NEXT:    store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-; CHECK-NEXT:    store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-; CHECK-NEXT:    store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-; CHECK-NEXT:    store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-; CHECK-NEXT:    store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-; CHECK-NEXT:    store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-; CHECK-NEXT:    store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-; CHECK-NEXT:    store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-; CHECK-NEXT:    store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-; CHECK-NEXT:    store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-; CHECK-NEXT:    store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-; CHECK-NEXT:    store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-; CHECK-NEXT:    store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-; CHECK-NEXT:    store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-; CHECK-NEXT:    store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-; CHECK-NEXT:    store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-; CHECK-NEXT:    store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-; CHECK-NEXT:    store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-; CHECK-NEXT:    store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-; CHECK-NEXT:    store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-; CHECK-NEXT:    store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+; CHECK-NEXT:    store i16 [[R0]], ptr @c16, align 2
+; CHECK-NEXT:    store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1), align 2
+; CHECK-NEXT:    store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2), align 2
+; CHECK-NEXT:    store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3), align 2
+; CHECK-NEXT:    store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4), align 2
+; CHECK-NEXT:    store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5), align 2
+; CHECK-NEXT:    store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6), align 2
+; CHECK-NEXT:    store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7), align 2
+; CHECK-NEXT:    store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; CHECK-NEXT:    store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9), align 2
+; CHECK-NEXT:    store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+; CHECK-NEXT:    store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+; CHECK-NEXT:    store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+; CHECK-NEXT:    store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+; CHECK-NEXT:    store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+; CHECK-NEXT:    store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+; CHECK-NEXT:    store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; CHECK-NEXT:    store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+; CHECK-NEXT:    store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+; CHECK-NEXT:    store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+; CHECK-NEXT:    store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+; CHECK-NEXT:    store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+; CHECK-NEXT:    store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+; CHECK-NEXT:    store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+; CHECK-NEXT:    store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; CHECK-NEXT:    store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+; CHECK-NEXT:    store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+; CHECK-NEXT:    store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+; CHECK-NEXT:    store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+; CHECK-NEXT:    store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+; CHECK-NEXT:    store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+; CHECK-NEXT:    store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %c0  = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a0 , i16 %b0 )
   %c1  = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a1 , i16 %b1 )
   %c2  = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a2 , i16 %b2 )
@@ -571,171 +571,171 @@ define void @sub_v32i16() {
   %r29 = extractvalue {i16, i1} %c29, 0
   %r30 = extractvalue {i16, i1} %c30, 0
   %r31 = extractvalue {i16, i1} %c31, 0
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @sub_v64i8() {
 ; CHECK-LABEL: @sub_v64i8(
-; CHECK-NEXT:    [[A0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[A1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[A2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[A3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[A4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[A5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[A6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[A7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[A8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[A9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[A10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[A11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[A12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[A13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[A14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[A15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[A16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[A17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[A18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[A19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[A20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[A21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[A22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[A23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[A24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[A25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[A26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[A27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[A28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[A29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[A30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[A31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[A32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[A33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[A34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[A35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[A36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[A37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[A38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[A39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[A40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[A41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[A42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[A43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[A44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[A45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[A46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[A47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[A48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[A49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[A50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[A51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[A52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[A53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[A54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[A55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[A56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[A57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[A58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[A59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[A60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[A61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[A62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[A63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-; CHECK-NEXT:    [[B0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[B1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[B2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[B3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[B4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[B5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[B6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[B7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[B8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[B9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[B10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[B11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[B12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[B13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[B14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[B15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[B16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[B17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[B18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[B19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[B20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[B21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[B22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[B23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[B24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[B25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[B26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[B27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[B28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[B29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[B30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[B31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[B32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[B33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[B34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[B35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[B36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[B37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[B38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[B39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[B40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[B41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[B42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[B43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[B44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[B45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[B46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[B47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[B48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[B49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[B50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[B51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[B52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[B53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[B54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[B55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[B56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[B57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[B58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[B59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[B60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[B61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[B62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[B63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[A0:%.*]] = load i8, ptr @a8, align 1
+; CHECK-NEXT:    [[A1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[A2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[A3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[A4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[A5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[A6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[A7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[A8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[A9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[A10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[A11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[A12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[A13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[A14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[A15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[A16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[A17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[A18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[A19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[A20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[A21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[A22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[A23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[A24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[A25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[A26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[A27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[A28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[A29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[A30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[A31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[A32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[A33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[A34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[A35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[A36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[A37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[A38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[A39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[A40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[A41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[A42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[A43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[A44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[A45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[A46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[A47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[A48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[A49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[A50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[A51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[A52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[A53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[A54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[A55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[A56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[A57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[A58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[A59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[A60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[A61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[A62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[A63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[B0:%.*]] = load i8, ptr @b8, align 1
+; CHECK-NEXT:    [[B1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[B2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[B3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[B4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[B5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[B6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[B7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[B8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[B9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[B10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[B11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[B12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[B13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[B14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[B15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[B16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[B17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[B18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[B19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[B20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[B21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[B22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[B23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[B24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[B25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[B26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[B27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[B28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[B29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[B30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[B31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[B32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[B33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[B34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[B35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[B36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[B37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[B38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[B39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[B40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[B41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[B42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[B43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[B44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[B45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[B46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[B47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[B48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[B49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[B50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[B51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[B52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[B53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[B54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[B55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[B56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[B57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[B58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[B59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[B60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[B61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[B62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[B63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    [[C0:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[A0]], i8 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[A1]], i8 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[A2]], i8 [[B2]])
@@ -864,200 +864,200 @@ define void @sub_v64i8() {
 ; CHECK-NEXT:    [[R61:%.*]] = extractvalue { i8, i1 } [[C61]], 0
 ; CHECK-NEXT:    [[R62:%.*]] = extractvalue { i8, i1 } [[C62]], 0
 ; CHECK-NEXT:    [[R63:%.*]] = extractvalue { i8, i1 } [[C63]], 0
-; CHECK-NEXT:    store i8 [[R0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0), align 1
-; CHECK-NEXT:    store i8 [[R1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1), align 1
-; CHECK-NEXT:    store i8 [[R2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2), align 1
-; CHECK-NEXT:    store i8 [[R3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3), align 1
-; CHECK-NEXT:    store i8 [[R4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4), align 1
-; CHECK-NEXT:    store i8 [[R5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5), align 1
-; CHECK-NEXT:    store i8 [[R6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6), align 1
-; CHECK-NEXT:    store i8 [[R7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7), align 1
-; CHECK-NEXT:    store i8 [[R8]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8), align 1
-; CHECK-NEXT:    store i8 [[R9]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9), align 1
-; CHECK-NEXT:    store i8 [[R10]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-; CHECK-NEXT:    store i8 [[R11]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-; CHECK-NEXT:    store i8 [[R12]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-; CHECK-NEXT:    store i8 [[R13]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-; CHECK-NEXT:    store i8 [[R14]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-; CHECK-NEXT:    store i8 [[R15]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-; CHECK-NEXT:    store i8 [[R16]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-; CHECK-NEXT:    store i8 [[R17]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-; CHECK-NEXT:    store i8 [[R18]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-; CHECK-NEXT:    store i8 [[R19]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-; CHECK-NEXT:    store i8 [[R20]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-; CHECK-NEXT:    store i8 [[R21]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-; CHECK-NEXT:    store i8 [[R22]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-; CHECK-NEXT:    store i8 [[R23]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-; CHECK-NEXT:    store i8 [[R24]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-; CHECK-NEXT:    store i8 [[R25]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-; CHECK-NEXT:    store i8 [[R26]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-; CHECK-NEXT:    store i8 [[R27]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-; CHECK-NEXT:    store i8 [[R28]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-; CHECK-NEXT:    store i8 [[R29]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-; CHECK-NEXT:    store i8 [[R30]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-; CHECK-NEXT:    store i8 [[R31]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-; CHECK-NEXT:    store i8 [[R32]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-; CHECK-NEXT:    store i8 [[R33]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-; CHECK-NEXT:    store i8 [[R34]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-; CHECK-NEXT:    store i8 [[R35]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-; CHECK-NEXT:    store i8 [[R36]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-; CHECK-NEXT:    store i8 [[R37]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-; CHECK-NEXT:    store i8 [[R38]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-; CHECK-NEXT:    store i8 [[R39]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-; CHECK-NEXT:    store i8 [[R40]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-; CHECK-NEXT:    store i8 [[R41]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-; CHECK-NEXT:    store i8 [[R42]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-; CHECK-NEXT:    store i8 [[R43]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-; CHECK-NEXT:    store i8 [[R44]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-; CHECK-NEXT:    store i8 [[R45]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-; CHECK-NEXT:    store i8 [[R46]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-; CHECK-NEXT:    store i8 [[R47]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-; CHECK-NEXT:    store i8 [[R48]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-; CHECK-NEXT:    store i8 [[R49]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-; CHECK-NEXT:    store i8 [[R50]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-; CHECK-NEXT:    store i8 [[R51]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-; CHECK-NEXT:    store i8 [[R52]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-; CHECK-NEXT:    store i8 [[R53]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-; CHECK-NEXT:    store i8 [[R54]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-; CHECK-NEXT:    store i8 [[R55]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-; CHECK-NEXT:    store i8 [[R56]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-; CHECK-NEXT:    store i8 [[R57]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-; CHECK-NEXT:    store i8 [[R58]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-; CHECK-NEXT:    store i8 [[R59]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-; CHECK-NEXT:    store i8 [[R60]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-; CHECK-NEXT:    store i8 [[R61]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-; CHECK-NEXT:    store i8 [[R62]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-; CHECK-NEXT:    store i8 [[R63]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+; CHECK-NEXT:    store i8 [[R0]], ptr @c8, align 1
+; CHECK-NEXT:    store i8 [[R1]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1), align 1
+; CHECK-NEXT:    store i8 [[R2]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2), align 1
+; CHECK-NEXT:    store i8 [[R3]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3), align 1
+; CHECK-NEXT:    store i8 [[R4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4), align 1
+; CHECK-NEXT:    store i8 [[R5]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5), align 1
+; CHECK-NEXT:    store i8 [[R6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6), align 1
+; CHECK-NEXT:    store i8 [[R7]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7), align 1
+; CHECK-NEXT:    store i8 [[R8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8), align 1
+; CHECK-NEXT:    store i8 [[R9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9), align 1
+; CHECK-NEXT:    store i8 [[R10]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+; CHECK-NEXT:    store i8 [[R11]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+; CHECK-NEXT:    store i8 [[R12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+; CHECK-NEXT:    store i8 [[R13]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+; CHECK-NEXT:    store i8 [[R14]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+; CHECK-NEXT:    store i8 [[R15]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+; CHECK-NEXT:    store i8 [[R16]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; CHECK-NEXT:    store i8 [[R17]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+; CHECK-NEXT:    store i8 [[R18]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+; CHECK-NEXT:    store i8 [[R19]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+; CHECK-NEXT:    store i8 [[R20]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+; CHECK-NEXT:    store i8 [[R21]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+; CHECK-NEXT:    store i8 [[R22]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+; CHECK-NEXT:    store i8 [[R23]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+; CHECK-NEXT:    store i8 [[R24]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+; CHECK-NEXT:    store i8 [[R25]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+; CHECK-NEXT:    store i8 [[R26]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+; CHECK-NEXT:    store i8 [[R27]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+; CHECK-NEXT:    store i8 [[R28]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+; CHECK-NEXT:    store i8 [[R29]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+; CHECK-NEXT:    store i8 [[R30]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+; CHECK-NEXT:    store i8 [[R31]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+; CHECK-NEXT:    store i8 [[R32]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; CHECK-NEXT:    store i8 [[R33]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+; CHECK-NEXT:    store i8 [[R34]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+; CHECK-NEXT:    store i8 [[R35]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+; CHECK-NEXT:    store i8 [[R36]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+; CHECK-NEXT:    store i8 [[R37]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+; CHECK-NEXT:    store i8 [[R38]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+; CHECK-NEXT:    store i8 [[R39]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+; CHECK-NEXT:    store i8 [[R40]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+; CHECK-NEXT:    store i8 [[R41]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+; CHECK-NEXT:    store i8 [[R42]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+; CHECK-NEXT:    store i8 [[R43]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+; CHECK-NEXT:    store i8 [[R44]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+; CHECK-NEXT:    store i8 [[R45]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+; CHECK-NEXT:    store i8 [[R46]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+; CHECK-NEXT:    store i8 [[R47]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+; CHECK-NEXT:    store i8 [[R48]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; CHECK-NEXT:    store i8 [[R49]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+; CHECK-NEXT:    store i8 [[R50]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+; CHECK-NEXT:    store i8 [[R51]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+; CHECK-NEXT:    store i8 [[R52]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+; CHECK-NEXT:    store i8 [[R53]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+; CHECK-NEXT:    store i8 [[R54]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+; CHECK-NEXT:    store i8 [[R55]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+; CHECK-NEXT:    store i8 [[R56]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+; CHECK-NEXT:    store i8 [[R57]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+; CHECK-NEXT:    store i8 [[R58]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+; CHECK-NEXT:    store i8 [[R59]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+; CHECK-NEXT:    store i8 [[R60]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+; CHECK-NEXT:    store i8 [[R61]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+; CHECK-NEXT:    store i8 [[R62]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+; CHECK-NEXT:    store i8 [[R63]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %c0  = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a0 , i8 %b0 )
   %c1  = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a1 , i8 %b1 )
   %c2  = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a2 , i8 %b2 )
@@ -1186,69 +1186,69 @@ define void @sub_v64i8() {
   %r61 = extractvalue {i8, i1} %c61, 0
   %r62 = extractvalue {i8, i1} %c62, 0
   %r63 = extractvalue {i8, i1} %c63, 0
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
index 6e0502d1c4355..3510863c88930 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
@@ -27,22 +27,22 @@ declare i8  @llvm.usub.sat.i8 (i8 , i8 )
 
 define void @sub_v8i64() {
 ; SSE-LABEL: @sub_v8i64(
-; SSE-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[R0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A0]], i64 [[B0]])
 ; SSE-NEXT:    [[R1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A1]], i64 [[B1]])
 ; SSE-NEXT:    [[R2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A2]], i64 [[B2]])
@@ -51,50 +51,50 @@ define void @sub_v8i64() {
 ; SSE-NEXT:    [[R5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A5]], i64 [[B5]])
 ; SSE-NEXT:    [[R6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A6]], i64 [[B6]])
 ; SSE-NEXT:    [[R7:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A7]], i64 [[B7]])
-; SSE-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.usub.sat.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.usub.sat.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.usub.sat.i64(i64 %a2, i64 %b2)
@@ -103,87 +103,87 @@ define void @sub_v8i64() {
   %r5 = call i64 @llvm.usub.sat.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.usub.sat.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.usub.sat.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sub_v16i32() {
 ; SSE-LABEL: @sub_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.usub.sat.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.usub.sat.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.usub.sat.i32(i32 %a2 , i32 %b2 )
@@ -200,127 +200,127 @@ define void @sub_v16i32() {
   %r13 = call i32 @llvm.usub.sat.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.usub.sat.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.usub.sat.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sub_v32i16() {
 ; SSE-LABEL: @sub_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.usub.sat.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.usub.sat.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.usub.sat.i16(i16 %a2 , i16 %b2 )
@@ -353,207 +353,207 @@ define void @sub_v32i16() {
   %r29 = call i16 @llvm.usub.sat.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.usub.sat.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.usub.sat.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @sub_v64i8() {
 ; SSE-LABEL: @sub_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.usub.sat.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.usub.sat.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.usub.sat.i8(i8 %a2 , i8 %b2 )
@@ -618,69 +618,69 @@ define void @sub_v64i8() {
   %r61 = call i8 @llvm.usub.sat.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.usub.sat.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.usub.sat.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
index caaaab9b11604..11a68a5dfbcca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
@@ -27,22 +27,22 @@ declare {i8 , i1} @llvm.usub.with.overflow.i8 (i8 , i8 )
 
 define void @sub_v8i64() {
 ; CHECK-LABEL: @sub_v8i64(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[C0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A0]], i64 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A1]], i64 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A2]], i64 [[B2]])
@@ -59,32 +59,32 @@ define void @sub_v8i64() {
 ; CHECK-NEXT:    [[R5:%.*]] = extractvalue { i64, i1 } [[C5]], 0
 ; CHECK-NEXT:    [[R6:%.*]] = extractvalue { i64, i1 } [[C6]], 0
 ; CHECK-NEXT:    [[R7:%.*]] = extractvalue { i64, i1 } [[C7]], 0
-; CHECK-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; CHECK-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; CHECK-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; CHECK-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; CHECK-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; CHECK-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; CHECK-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; CHECK-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; CHECK-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; CHECK-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; CHECK-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; CHECK-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; CHECK-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; CHECK-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %c0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a0, i64 %b0)
   %c1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a1, i64 %b1)
   %c2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a2, i64 %b2)
@@ -101,51 +101,51 @@ define void @sub_v8i64() {
   %r5 = extractvalue {i64, i1} %c5, 0
   %r6 = extractvalue {i64, i1} %c6, 0
   %r7 = extractvalue {i64, i1} %c7, 0
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sub_v16i32() {
 ; CHECK-LABEL: @sub_v16i32(
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; CHECK-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; CHECK-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; CHECK-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; CHECK-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; CHECK-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; CHECK-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; CHECK-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; CHECK-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; CHECK-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; CHECK-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; CHECK-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; CHECK-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; CHECK-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; CHECK-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; CHECK-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; CHECK-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; CHECK-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; CHECK-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; CHECK-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; CHECK-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; CHECK-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; CHECK-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; CHECK-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; CHECK-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; CHECK-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; CHECK-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; CHECK-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    [[C0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A0]], i32 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A1]], i32 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[A2]], i32 [[B2]])
@@ -178,56 +178,56 @@ define void @sub_v16i32() {
 ; CHECK-NEXT:    [[R13:%.*]] = extractvalue { i32, i1 } [[C13]], 0
 ; CHECK-NEXT:    [[R14:%.*]] = extractvalue { i32, i1 } [[C14]], 0
 ; CHECK-NEXT:    [[R15:%.*]] = extractvalue { i32, i1 } [[C15]], 0
-; CHECK-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; CHECK-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; CHECK-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; CHECK-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; CHECK-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; CHECK-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; CHECK-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; CHECK-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; CHECK-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; CHECK-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; CHECK-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; CHECK-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; CHECK-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; CHECK-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; CHECK-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; CHECK-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; CHECK-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; CHECK-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; CHECK-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; CHECK-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; CHECK-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; CHECK-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; CHECK-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; CHECK-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; CHECK-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; CHECK-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; CHECK-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; CHECK-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; CHECK-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; CHECK-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; CHECK-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %c0  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a0 , i32 %b0 )
   %c1  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a1 , i32 %b1 )
   %c2  = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a2 , i32 %b2 )
@@ -260,91 +260,91 @@ define void @sub_v16i32() {
   %r13 = extractvalue {i32, i1} %c13, 0
   %r14 = extractvalue {i32, i1} %c14, 0
   %r15 = extractvalue {i32, i1} %c15, 0
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sub_v32i16() {
 ; CHECK-LABEL: @sub_v32i16(
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[A1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[A2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[A3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[A4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[A5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[A6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[A7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[A8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[A9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[A10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[A11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[A12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[A13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[A14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[A15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[A16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[A17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[A18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[A19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[A20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[A21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[A22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[A23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[A24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[A25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[A26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[A27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[A28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[A29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[A30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[A31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0), align 2
-; CHECK-NEXT:    [[B1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1), align 2
-; CHECK-NEXT:    [[B2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2), align 2
-; CHECK-NEXT:    [[B3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3), align 2
-; CHECK-NEXT:    [[B4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4), align 2
-; CHECK-NEXT:    [[B5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5), align 2
-; CHECK-NEXT:    [[B6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6), align 2
-; CHECK-NEXT:    [[B7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7), align 2
-; CHECK-NEXT:    [[B8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8), align 2
-; CHECK-NEXT:    [[B9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9), align 2
-; CHECK-NEXT:    [[B10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-; CHECK-NEXT:    [[B11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-; CHECK-NEXT:    [[B12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-; CHECK-NEXT:    [[B13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-; CHECK-NEXT:    [[B14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-; CHECK-NEXT:    [[B15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-; CHECK-NEXT:    [[B16:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-; CHECK-NEXT:    [[B17:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-; CHECK-NEXT:    [[B18:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-; CHECK-NEXT:    [[B19:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-; CHECK-NEXT:    [[B20:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-; CHECK-NEXT:    [[B21:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-; CHECK-NEXT:    [[B22:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-; CHECK-NEXT:    [[B23:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-; CHECK-NEXT:    [[B24:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-; CHECK-NEXT:    [[B25:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-; CHECK-NEXT:    [[B26:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-; CHECK-NEXT:    [[B27:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-; CHECK-NEXT:    [[B28:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-; CHECK-NEXT:    [[B29:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-; CHECK-NEXT:    [[B30:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-; CHECK-NEXT:    [[B31:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr @a16, align 2
+; CHECK-NEXT:    [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr @b16, align 2
+; CHECK-NEXT:    [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2
+; CHECK-NEXT:    [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2
+; CHECK-NEXT:    [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2
+; CHECK-NEXT:    [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2
+; CHECK-NEXT:    [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2
+; CHECK-NEXT:    [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2
+; CHECK-NEXT:    [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2
+; CHECK-NEXT:    [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
+; CHECK-NEXT:    [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2
+; CHECK-NEXT:    [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+; CHECK-NEXT:    [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+; CHECK-NEXT:    [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+; CHECK-NEXT:    [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+; CHECK-NEXT:    [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+; CHECK-NEXT:    [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+; CHECK-NEXT:    [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+; CHECK-NEXT:    [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+; CHECK-NEXT:    [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+; CHECK-NEXT:    [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+; CHECK-NEXT:    [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+; CHECK-NEXT:    [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+; CHECK-NEXT:    [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+; CHECK-NEXT:    [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+; CHECK-NEXT:    [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+; CHECK-NEXT:    [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+; CHECK-NEXT:    [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+; CHECK-NEXT:    [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+; CHECK-NEXT:    [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+; CHECK-NEXT:    [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+; CHECK-NEXT:    [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+; CHECK-NEXT:    [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    [[C0:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[A0]], i16 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[A1]], i16 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[A2]], i16 [[B2]])
@@ -409,104 +409,104 @@ define void @sub_v32i16() {
 ; CHECK-NEXT:    [[R29:%.*]] = extractvalue { i16, i1 } [[C29]], 0
 ; CHECK-NEXT:    [[R30:%.*]] = extractvalue { i16, i1 } [[C30]], 0
 ; CHECK-NEXT:    [[R31:%.*]] = extractvalue { i16, i1 } [[C31]], 0
-; CHECK-NEXT:    store i16 [[R0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0), align 2
-; CHECK-NEXT:    store i16 [[R1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1), align 2
-; CHECK-NEXT:    store i16 [[R2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2), align 2
-; CHECK-NEXT:    store i16 [[R3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3), align 2
-; CHECK-NEXT:    store i16 [[R4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4), align 2
-; CHECK-NEXT:    store i16 [[R5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5), align 2
-; CHECK-NEXT:    store i16 [[R6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6), align 2
-; CHECK-NEXT:    store i16 [[R7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7), align 2
-; CHECK-NEXT:    store i16 [[R8]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8), align 2
-; CHECK-NEXT:    store i16 [[R9]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9), align 2
-; CHECK-NEXT:    store i16 [[R10]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-; CHECK-NEXT:    store i16 [[R11]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-; CHECK-NEXT:    store i16 [[R12]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-; CHECK-NEXT:    store i16 [[R13]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-; CHECK-NEXT:    store i16 [[R14]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-; CHECK-NEXT:    store i16 [[R15]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-; CHECK-NEXT:    store i16 [[R16]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-; CHECK-NEXT:    store i16 [[R17]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-; CHECK-NEXT:    store i16 [[R18]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-; CHECK-NEXT:    store i16 [[R19]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-; CHECK-NEXT:    store i16 [[R20]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-; CHECK-NEXT:    store i16 [[R21]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-; CHECK-NEXT:    store i16 [[R22]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-; CHECK-NEXT:    store i16 [[R23]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-; CHECK-NEXT:    store i16 [[R24]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-; CHECK-NEXT:    store i16 [[R25]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-; CHECK-NEXT:    store i16 [[R26]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-; CHECK-NEXT:    store i16 [[R27]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-; CHECK-NEXT:    store i16 [[R28]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-; CHECK-NEXT:    store i16 [[R29]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-; CHECK-NEXT:    store i16 [[R30]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-; CHECK-NEXT:    store i16 [[R31]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+; CHECK-NEXT:    store i16 [[R0]], ptr @c16, align 2
+; CHECK-NEXT:    store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1), align 2
+; CHECK-NEXT:    store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2), align 2
+; CHECK-NEXT:    store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3), align 2
+; CHECK-NEXT:    store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4), align 2
+; CHECK-NEXT:    store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5), align 2
+; CHECK-NEXT:    store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6), align 2
+; CHECK-NEXT:    store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7), align 2
+; CHECK-NEXT:    store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; CHECK-NEXT:    store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9), align 2
+; CHECK-NEXT:    store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+; CHECK-NEXT:    store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+; CHECK-NEXT:    store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+; CHECK-NEXT:    store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+; CHECK-NEXT:    store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+; CHECK-NEXT:    store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+; CHECK-NEXT:    store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; CHECK-NEXT:    store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+; CHECK-NEXT:    store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+; CHECK-NEXT:    store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+; CHECK-NEXT:    store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+; CHECK-NEXT:    store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+; CHECK-NEXT:    store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+; CHECK-NEXT:    store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+; CHECK-NEXT:    store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+; CHECK-NEXT:    store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+; CHECK-NEXT:    store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+; CHECK-NEXT:    store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+; CHECK-NEXT:    store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+; CHECK-NEXT:    store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+; CHECK-NEXT:    store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+; CHECK-NEXT:    store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %c0  = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %a0 , i16 %b0 )
   %c1  = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %a1 , i16 %b1 )
   %c2  = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %a2 , i16 %b2 )
@@ -571,171 +571,171 @@ define void @sub_v32i16() {
   %r29 = extractvalue {i16, i1} %c29, 0
   %r30 = extractvalue {i16, i1} %c30, 0
   %r31 = extractvalue {i16, i1} %c31, 0
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @sub_v64i8() {
 ; CHECK-LABEL: @sub_v64i8(
-; CHECK-NEXT:    [[A0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[A1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[A2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[A3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[A4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[A5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[A6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[A7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[A8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[A9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[A10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[A11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[A12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[A13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[A14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[A15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[A16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[A17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[A18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[A19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[A20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[A21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[A22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[A23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[A24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[A25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[A26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[A27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[A28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[A29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[A30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[A31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[A32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[A33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[A34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[A35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[A36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[A37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[A38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[A39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[A40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[A41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[A42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[A43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[A44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[A45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[A46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[A47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[A48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[A49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[A50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[A51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[A52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[A53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[A54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[A55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[A56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[A57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[A58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[A59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[A60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[A61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[A62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[A63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-; CHECK-NEXT:    [[B0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0), align 1
-; CHECK-NEXT:    [[B1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1), align 1
-; CHECK-NEXT:    [[B2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2), align 1
-; CHECK-NEXT:    [[B3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3), align 1
-; CHECK-NEXT:    [[B4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4), align 1
-; CHECK-NEXT:    [[B5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5), align 1
-; CHECK-NEXT:    [[B6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6), align 1
-; CHECK-NEXT:    [[B7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7), align 1
-; CHECK-NEXT:    [[B8:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8), align 1
-; CHECK-NEXT:    [[B9:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9), align 1
-; CHECK-NEXT:    [[B10:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-; CHECK-NEXT:    [[B11:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-; CHECK-NEXT:    [[B12:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-; CHECK-NEXT:    [[B13:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-; CHECK-NEXT:    [[B14:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-; CHECK-NEXT:    [[B15:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-; CHECK-NEXT:    [[B16:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-; CHECK-NEXT:    [[B17:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-; CHECK-NEXT:    [[B18:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-; CHECK-NEXT:    [[B19:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-; CHECK-NEXT:    [[B20:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-; CHECK-NEXT:    [[B21:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-; CHECK-NEXT:    [[B22:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-; CHECK-NEXT:    [[B23:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-; CHECK-NEXT:    [[B24:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-; CHECK-NEXT:    [[B25:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-; CHECK-NEXT:    [[B26:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-; CHECK-NEXT:    [[B27:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-; CHECK-NEXT:    [[B28:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-; CHECK-NEXT:    [[B29:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-; CHECK-NEXT:    [[B30:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-; CHECK-NEXT:    [[B31:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-; CHECK-NEXT:    [[B32:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-; CHECK-NEXT:    [[B33:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-; CHECK-NEXT:    [[B34:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-; CHECK-NEXT:    [[B35:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-; CHECK-NEXT:    [[B36:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-; CHECK-NEXT:    [[B37:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-; CHECK-NEXT:    [[B38:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-; CHECK-NEXT:    [[B39:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-; CHECK-NEXT:    [[B40:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-; CHECK-NEXT:    [[B41:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-; CHECK-NEXT:    [[B42:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-; CHECK-NEXT:    [[B43:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-; CHECK-NEXT:    [[B44:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-; CHECK-NEXT:    [[B45:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-; CHECK-NEXT:    [[B46:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-; CHECK-NEXT:    [[B47:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-; CHECK-NEXT:    [[B48:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-; CHECK-NEXT:    [[B49:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-; CHECK-NEXT:    [[B50:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-; CHECK-NEXT:    [[B51:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-; CHECK-NEXT:    [[B52:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-; CHECK-NEXT:    [[B53:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-; CHECK-NEXT:    [[B54:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-; CHECK-NEXT:    [[B55:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-; CHECK-NEXT:    [[B56:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-; CHECK-NEXT:    [[B57:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-; CHECK-NEXT:    [[B58:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-; CHECK-NEXT:    [[B59:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-; CHECK-NEXT:    [[B60:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-; CHECK-NEXT:    [[B61:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-; CHECK-NEXT:    [[B62:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-; CHECK-NEXT:    [[B63:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[A0:%.*]] = load i8, ptr @a8, align 1
+; CHECK-NEXT:    [[A1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[A2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[A3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[A4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[A5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[A6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[A7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[A8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[A9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[A10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[A11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[A12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[A13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[A14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[A15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[A16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[A17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[A18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[A19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[A20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[A21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[A22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[A23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[A24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[A25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[A26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[A27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[A28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[A29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[A30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[A31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[A32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[A33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[A34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[A35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[A36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[A37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[A38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[A39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[A40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[A41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[A42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[A43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[A44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[A45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[A46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[A47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[A48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[A49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[A50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[A51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[A52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[A53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[A54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[A55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[A56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[A57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[A58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[A59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[A60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[A61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[A62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[A63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+; CHECK-NEXT:    [[B0:%.*]] = load i8, ptr @b8, align 1
+; CHECK-NEXT:    [[B1:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1), align 1
+; CHECK-NEXT:    [[B2:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2), align 1
+; CHECK-NEXT:    [[B3:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3), align 1
+; CHECK-NEXT:    [[B4:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4), align 1
+; CHECK-NEXT:    [[B5:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5), align 1
+; CHECK-NEXT:    [[B6:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6), align 1
+; CHECK-NEXT:    [[B7:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7), align 1
+; CHECK-NEXT:    [[B8:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8), align 1
+; CHECK-NEXT:    [[B9:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9), align 1
+; CHECK-NEXT:    [[B10:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+; CHECK-NEXT:    [[B11:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+; CHECK-NEXT:    [[B12:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+; CHECK-NEXT:    [[B13:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+; CHECK-NEXT:    [[B14:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+; CHECK-NEXT:    [[B15:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+; CHECK-NEXT:    [[B16:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+; CHECK-NEXT:    [[B17:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+; CHECK-NEXT:    [[B18:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+; CHECK-NEXT:    [[B19:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+; CHECK-NEXT:    [[B20:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+; CHECK-NEXT:    [[B21:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+; CHECK-NEXT:    [[B22:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+; CHECK-NEXT:    [[B23:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+; CHECK-NEXT:    [[B24:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+; CHECK-NEXT:    [[B25:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+; CHECK-NEXT:    [[B26:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+; CHECK-NEXT:    [[B27:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+; CHECK-NEXT:    [[B28:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+; CHECK-NEXT:    [[B29:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+; CHECK-NEXT:    [[B30:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+; CHECK-NEXT:    [[B31:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+; CHECK-NEXT:    [[B32:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+; CHECK-NEXT:    [[B33:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+; CHECK-NEXT:    [[B34:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+; CHECK-NEXT:    [[B35:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+; CHECK-NEXT:    [[B36:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+; CHECK-NEXT:    [[B37:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+; CHECK-NEXT:    [[B38:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+; CHECK-NEXT:    [[B39:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+; CHECK-NEXT:    [[B40:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+; CHECK-NEXT:    [[B41:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+; CHECK-NEXT:    [[B42:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+; CHECK-NEXT:    [[B43:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+; CHECK-NEXT:    [[B44:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+; CHECK-NEXT:    [[B45:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+; CHECK-NEXT:    [[B46:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+; CHECK-NEXT:    [[B47:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+; CHECK-NEXT:    [[B48:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+; CHECK-NEXT:    [[B49:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+; CHECK-NEXT:    [[B50:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+; CHECK-NEXT:    [[B51:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+; CHECK-NEXT:    [[B52:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+; CHECK-NEXT:    [[B53:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+; CHECK-NEXT:    [[B54:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+; CHECK-NEXT:    [[B55:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+; CHECK-NEXT:    [[B56:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+; CHECK-NEXT:    [[B57:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+; CHECK-NEXT:    [[B58:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+; CHECK-NEXT:    [[B59:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+; CHECK-NEXT:    [[B60:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+; CHECK-NEXT:    [[B61:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+; CHECK-NEXT:    [[B62:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+; CHECK-NEXT:    [[B63:%.*]] = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    [[C0:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A0]], i8 [[B0]])
 ; CHECK-NEXT:    [[C1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A1]], i8 [[B1]])
 ; CHECK-NEXT:    [[C2:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A2]], i8 [[B2]])
@@ -864,200 +864,200 @@ define void @sub_v64i8() {
 ; CHECK-NEXT:    [[R61:%.*]] = extractvalue { i8, i1 } [[C61]], 0
 ; CHECK-NEXT:    [[R62:%.*]] = extractvalue { i8, i1 } [[C62]], 0
 ; CHECK-NEXT:    [[R63:%.*]] = extractvalue { i8, i1 } [[C63]], 0
-; CHECK-NEXT:    store i8 [[R0]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0), align 1
-; CHECK-NEXT:    store i8 [[R1]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1), align 1
-; CHECK-NEXT:    store i8 [[R2]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2), align 1
-; CHECK-NEXT:    store i8 [[R3]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3), align 1
-; CHECK-NEXT:    store i8 [[R4]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4), align 1
-; CHECK-NEXT:    store i8 [[R5]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5), align 1
-; CHECK-NEXT:    store i8 [[R6]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6), align 1
-; CHECK-NEXT:    store i8 [[R7]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7), align 1
-; CHECK-NEXT:    store i8 [[R8]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8), align 1
-; CHECK-NEXT:    store i8 [[R9]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9), align 1
-; CHECK-NEXT:    store i8 [[R10]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-; CHECK-NEXT:    store i8 [[R11]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-; CHECK-NEXT:    store i8 [[R12]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-; CHECK-NEXT:    store i8 [[R13]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-; CHECK-NEXT:    store i8 [[R14]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-; CHECK-NEXT:    store i8 [[R15]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-; CHECK-NEXT:    store i8 [[R16]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-; CHECK-NEXT:    store i8 [[R17]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-; CHECK-NEXT:    store i8 [[R18]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-; CHECK-NEXT:    store i8 [[R19]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-; CHECK-NEXT:    store i8 [[R20]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-; CHECK-NEXT:    store i8 [[R21]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-; CHECK-NEXT:    store i8 [[R22]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-; CHECK-NEXT:    store i8 [[R23]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-; CHECK-NEXT:    store i8 [[R24]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-; CHECK-NEXT:    store i8 [[R25]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-; CHECK-NEXT:    store i8 [[R26]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-; CHECK-NEXT:    store i8 [[R27]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-; CHECK-NEXT:    store i8 [[R28]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-; CHECK-NEXT:    store i8 [[R29]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-; CHECK-NEXT:    store i8 [[R30]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-; CHECK-NEXT:    store i8 [[R31]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-; CHECK-NEXT:    store i8 [[R32]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-; CHECK-NEXT:    store i8 [[R33]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-; CHECK-NEXT:    store i8 [[R34]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-; CHECK-NEXT:    store i8 [[R35]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-; CHECK-NEXT:    store i8 [[R36]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-; CHECK-NEXT:    store i8 [[R37]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-; CHECK-NEXT:    store i8 [[R38]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-; CHECK-NEXT:    store i8 [[R39]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-; CHECK-NEXT:    store i8 [[R40]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-; CHECK-NEXT:    store i8 [[R41]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-; CHECK-NEXT:    store i8 [[R42]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-; CHECK-NEXT:    store i8 [[R43]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-; CHECK-NEXT:    store i8 [[R44]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-; CHECK-NEXT:    store i8 [[R45]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-; CHECK-NEXT:    store i8 [[R46]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-; CHECK-NEXT:    store i8 [[R47]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-; CHECK-NEXT:    store i8 [[R48]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-; CHECK-NEXT:    store i8 [[R49]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-; CHECK-NEXT:    store i8 [[R50]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-; CHECK-NEXT:    store i8 [[R51]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-; CHECK-NEXT:    store i8 [[R52]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-; CHECK-NEXT:    store i8 [[R53]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-; CHECK-NEXT:    store i8 [[R54]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-; CHECK-NEXT:    store i8 [[R55]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-; CHECK-NEXT:    store i8 [[R56]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-; CHECK-NEXT:    store i8 [[R57]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-; CHECK-NEXT:    store i8 [[R58]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-; CHECK-NEXT:    store i8 [[R59]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-; CHECK-NEXT:    store i8 [[R60]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-; CHECK-NEXT:    store i8 [[R61]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-; CHECK-NEXT:    store i8 [[R62]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-; CHECK-NEXT:    store i8 [[R63]], i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+; CHECK-NEXT:    store i8 [[R0]], ptr @c8, align 1
+; CHECK-NEXT:    store i8 [[R1]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1), align 1
+; CHECK-NEXT:    store i8 [[R2]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2), align 1
+; CHECK-NEXT:    store i8 [[R3]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3), align 1
+; CHECK-NEXT:    store i8 [[R4]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4), align 1
+; CHECK-NEXT:    store i8 [[R5]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5), align 1
+; CHECK-NEXT:    store i8 [[R6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6), align 1
+; CHECK-NEXT:    store i8 [[R7]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7), align 1
+; CHECK-NEXT:    store i8 [[R8]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8), align 1
+; CHECK-NEXT:    store i8 [[R9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9), align 1
+; CHECK-NEXT:    store i8 [[R10]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+; CHECK-NEXT:    store i8 [[R11]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+; CHECK-NEXT:    store i8 [[R12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+; CHECK-NEXT:    store i8 [[R13]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+; CHECK-NEXT:    store i8 [[R14]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+; CHECK-NEXT:    store i8 [[R15]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+; CHECK-NEXT:    store i8 [[R16]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; CHECK-NEXT:    store i8 [[R17]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+; CHECK-NEXT:    store i8 [[R18]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+; CHECK-NEXT:    store i8 [[R19]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+; CHECK-NEXT:    store i8 [[R20]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+; CHECK-NEXT:    store i8 [[R21]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+; CHECK-NEXT:    store i8 [[R22]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+; CHECK-NEXT:    store i8 [[R23]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+; CHECK-NEXT:    store i8 [[R24]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+; CHECK-NEXT:    store i8 [[R25]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+; CHECK-NEXT:    store i8 [[R26]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+; CHECK-NEXT:    store i8 [[R27]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+; CHECK-NEXT:    store i8 [[R28]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+; CHECK-NEXT:    store i8 [[R29]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+; CHECK-NEXT:    store i8 [[R30]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+; CHECK-NEXT:    store i8 [[R31]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+; CHECK-NEXT:    store i8 [[R32]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; CHECK-NEXT:    store i8 [[R33]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+; CHECK-NEXT:    store i8 [[R34]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+; CHECK-NEXT:    store i8 [[R35]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+; CHECK-NEXT:    store i8 [[R36]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+; CHECK-NEXT:    store i8 [[R37]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+; CHECK-NEXT:    store i8 [[R38]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+; CHECK-NEXT:    store i8 [[R39]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+; CHECK-NEXT:    store i8 [[R40]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+; CHECK-NEXT:    store i8 [[R41]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+; CHECK-NEXT:    store i8 [[R42]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+; CHECK-NEXT:    store i8 [[R43]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+; CHECK-NEXT:    store i8 [[R44]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+; CHECK-NEXT:    store i8 [[R45]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+; CHECK-NEXT:    store i8 [[R46]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+; CHECK-NEXT:    store i8 [[R47]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+; CHECK-NEXT:    store i8 [[R48]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+; CHECK-NEXT:    store i8 [[R49]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+; CHECK-NEXT:    store i8 [[R50]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+; CHECK-NEXT:    store i8 [[R51]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+; CHECK-NEXT:    store i8 [[R52]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+; CHECK-NEXT:    store i8 [[R53]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+; CHECK-NEXT:    store i8 [[R54]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+; CHECK-NEXT:    store i8 [[R55]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+; CHECK-NEXT:    store i8 [[R56]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+; CHECK-NEXT:    store i8 [[R57]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+; CHECK-NEXT:    store i8 [[R58]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+; CHECK-NEXT:    store i8 [[R59]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+; CHECK-NEXT:    store i8 [[R60]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+; CHECK-NEXT:    store i8 [[R61]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+; CHECK-NEXT:    store i8 [[R62]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+; CHECK-NEXT:    store i8 [[R63]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %c0  = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %a0 , i8 %b0 )
   %c1  = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %a1 , i8 %b1 )
   %c2  = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %a2 , i8 %b2 )
@@ -1186,69 +1186,69 @@ define void @sub_v64i8() {
   %r61 = extractvalue {i8, i1} %c61, 0
   %r62 = extractvalue {i8, i1} %c62, 0
   %r63 = extractvalue {i8, i1} %c63, 0
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
index df302b9db42de..be54c1e04ca39 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
@@ -24,77 +24,77 @@
 
 define void @sub_v8i64() {
 ; SSE-LABEL: @sub_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = sub <2 x i64> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = sub <2 x i64> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v8i64(
-; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SLM-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SLM-NEXT:    [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SLM-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SLM-NEXT:    [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SLM-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SLM-NEXT:    [[TMP9:%.*]] = sub <2 x i64> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SLM-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SLM-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SLM-NEXT:    [[TMP12:%.*]] = sub <2 x i64> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SLM-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = sub <4 x i64> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = sub <8 x i64> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = sub i64 %a0, %b0
   %r1 = sub i64 %a1, %b1
   %r2 = sub i64 %a2, %b2
@@ -103,106 +103,106 @@ define void @sub_v8i64() {
   %r5 = sub i64 %a5, %b5
   %r6 = sub i64 %a6, %b6
   %r7 = sub i64 %a7, %b7
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sub_v16i32() {
 ; SSE-LABEL: @sub_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = sub <4 x i32> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v16i32(
-; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SLM-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SLM-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SLM-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SLM-NEXT:    [[TMP12:%.*]] = sub <4 x i32> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SLM-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = sub <8 x i32> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = sub <16 x i32> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = sub i32 %a0 , %b0
   %r1  = sub i32 %a1 , %b1
   %r2  = sub i32 %a2 , %b2
@@ -219,146 +219,146 @@ define void @sub_v16i32() {
   %r13 = sub i32 %a13, %b13
   %r14 = sub i32 %a14, %b14
   %r15 = sub i32 %a15, %b15
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sub_v32i16() {
 ; SSE-LABEL: @sub_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = sub <8 x i16> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = sub <8 x i16> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v32i16(
-; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SLM-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SLM-NEXT:    [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SLM-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SLM-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SLM-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SLM-NEXT:    [[TMP9:%.*]] = sub <8 x i16> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SLM-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SLM-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SLM-NEXT:    [[TMP12:%.*]] = sub <8 x i16> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SLM-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = sub <32 x i16> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = sub i16 %a0 , %b0
   %r1  = sub i16 %a1 , %b1
   %r2  = sub i16 %a2 , %b2
@@ -391,226 +391,226 @@ define void @sub_v32i16() {
   %r29 = sub i16 %a29, %b29
   %r30 = sub i16 %a30, %b30
   %r31 = sub i16 %a31, %b31
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @sub_v64i8() {
 ; SSE-LABEL: @sub_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sub <16 x i8> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <16 x i8> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = sub <16 x i8> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = sub <16 x i8> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sub_v64i8(
-; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = sub <16 x i8> [[TMP1]], [[TMP2]]
-; SLM-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SLM-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SLM-NEXT:    [[TMP6:%.*]] = sub <16 x i8> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SLM-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SLM-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SLM-NEXT:    [[TMP9:%.*]] = sub <16 x i8> [[TMP7]], [[TMP8]]
-; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SLM-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SLM-NEXT:    [[TMP12:%.*]] = sub <16 x i8> [[TMP10]], [[TMP11]]
-; SLM-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SLM-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SLM-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sub_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sub <32 x i8> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = sub <32 x i8> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sub_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = sub <64 x i8> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = sub i8 %a0 , %b0
   %r1  = sub i8 %a1 , %b1
   %r2  = sub i8 %a2 , %b2
@@ -675,69 +675,69 @@ define void @sub_v64i8() {
   %r61 = sub i8 %a61, %b61
   %r62 = sub i8 %a62, %b62
   %r63 = sub i8 %a63, %b63
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
index 095020d994abb..3a187930055f0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
@@ -27,58 +27,58 @@ declare i8  @llvm.umax.i8 (i8 , i8 )
 
 define void @umax_v8i64() {
 ; SSE-LABEL: @umax_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umax_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umax_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.umax.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.umax.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.umax.i64(i64 %a2, i64 %b2)
@@ -87,87 +87,87 @@ define void @umax_v8i64() {
   %r5 = call i64 @llvm.umax.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.umax.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.umax.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @umax_v16i32() {
 ; SSE-LABEL: @umax_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umax_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umax_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.umax.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.umax.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.umax.i32(i32 %a2 , i32 %b2 )
@@ -184,127 +184,127 @@ define void @umax_v16i32() {
   %r13 = call i32 @llvm.umax.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.umax.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.umax.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @umax_v32i16() {
 ; SSE-LABEL: @umax_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umax_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.umax.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.umax.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umax_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.umax.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.umax.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.umax.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.umax.i16(i16 %a2 , i16 %b2 )
@@ -337,207 +337,207 @@ define void @umax_v32i16() {
   %r29 = call i16 @llvm.umax.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.umax.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.umax.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @umax_v64i8() {
 ; SSE-LABEL: @umax_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umax_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.umax.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.umax.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umax_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.umax.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.umax.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.umax.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.umax.i8(i8 %a2 , i8 %b2 )
@@ -602,69 +602,69 @@ define void @umax_v64i8() {
   %r61 = call i8 @llvm.umax.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.umax.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.umax.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
index 661f5b90262b6..15119a9628067 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
@@ -27,58 +27,58 @@ declare i8  @llvm.umin.i8 (i8 , i8 )
 
 define void @umin_v8i64() {
 ; SSE-LABEL: @umin_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]])
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]])
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]])
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umin_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]])
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]])
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umin_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = call i64 @llvm.umin.i64(i64 %a0, i64 %b0)
   %r1 = call i64 @llvm.umin.i64(i64 %a1, i64 %b1)
   %r2 = call i64 @llvm.umin.i64(i64 %a2, i64 %b2)
@@ -87,87 +87,87 @@ define void @umin_v8i64() {
   %r5 = call i64 @llvm.umin.i64(i64 %a5, i64 %b5)
   %r6 = call i64 @llvm.umin.i64(i64 %a6, i64 %b6)
   %r7 = call i64 @llvm.umin.i64(i64 %a7, i64 %b7)
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @umin_v16i32() {
 ; SSE-LABEL: @umin_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]])
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umin_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]])
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umin_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = call i32 @llvm.umin.i32(i32 %a0 , i32 %b0 )
   %r1  = call i32 @llvm.umin.i32(i32 %a1 , i32 %b1 )
   %r2  = call i32 @llvm.umin.i32(i32 %a2 , i32 %b2 )
@@ -184,127 +184,127 @@ define void @umin_v16i32() {
   %r13 = call i32 @llvm.umin.i32(i32 %a13, i32 %b13)
   %r14 = call i32 @llvm.umin.i32(i32 %a14, i32 %b14)
   %r15 = call i32 @llvm.umin.i32(i32 %a15, i32 %b15)
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @umin_v32i16() {
 ; SSE-LABEL: @umin_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]])
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]])
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]])
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umin_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.umin.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.umin.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]])
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umin_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.umin.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = call i16 @llvm.umin.i16(i16 %a0 , i16 %b0 )
   %r1  = call i16 @llvm.umin.i16(i16 %a1 , i16 %b1 )
   %r2  = call i16 @llvm.umin.i16(i16 %a2 , i16 %b2 )
@@ -337,207 +337,207 @@ define void @umin_v32i16() {
   %r29 = call i16 @llvm.umin.i16(i16 %a29, i16 %b29)
   %r30 = call i16 @llvm.umin.i16(i16 %a30, i16 %b30)
   %r31 = call i16 @llvm.umin.i16(i16 %a31, i16 %b31)
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @umin_v64i8() {
 ; SSE-LABEL: @umin_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @umin_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]])
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]])
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @umin_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.umin.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]])
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = call i8 @llvm.umin.i8(i8 %a0 , i8 %b0 )
   %r1  = call i8 @llvm.umin.i8(i8 %a1 , i8 %b1 )
   %r2  = call i8 @llvm.umin.i8(i8 %a2 , i8 %b2 )
@@ -602,69 +602,69 @@ define void @umin_v64i8() {
   %r61 = call i8 @llvm.umin.i8(i8 %a61, i8 %b61)
   %r62 = call i8 @llvm.umin.i8(i8 %a62, i8 %b62)
   %r63 = call i8 @llvm.umin.i8(i8 %a63, i8 %b63)
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/atomics.ll b/llvm/test/Transforms/SLPVectorizer/X86/atomics.ll
index 8510803ec48cd..92edd5d753d05 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/atomics.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/atomics.ll
@@ -11,25 +11,25 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16
-; CHECK-NEXT:    store atomic i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 0) release, align 16
-; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4
-; CHECK-NEXT:    store atomic i32 1, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 1) release, align 4
-; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8
-; CHECK-NEXT:    store atomic i32 2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 2) release, align 8
-; CHECK-NEXT:    store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4
-; CHECK-NEXT:    store atomic i32 3, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 3) release, align 4
+; CHECK-NEXT:    store i32 0, ptr @a, align 16
+; CHECK-NEXT:    store atomic i32 0, ptr @x release, align 16
+; CHECK-NEXT:    store i32 0, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 1), align 4
+; CHECK-NEXT:    store atomic i32 1, ptr getelementptr inbounds ([4 x i32], ptr @x, i64 0, i64 1) release, align 4
+; CHECK-NEXT:    store i32 0, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 2), align 8
+; CHECK-NEXT:    store atomic i32 2, ptr getelementptr inbounds ([4 x i32], ptr @x, i64 0, i64 2) release, align 8
+; CHECK-NEXT:    store i32 0, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 3), align 4
+; CHECK-NEXT:    store atomic i32 3, ptr getelementptr inbounds ([4 x i32], ptr @x, i64 0, i64 3) release, align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16
-  store atomic i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 0) release, align 16
-  store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4
-  store atomic i32 1, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 1) release, align 4
-  store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8
-  store atomic i32 2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 2) release, align 8
-  store i32 0, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4
-  store atomic i32 3, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @x, i64 0, i64 3) release, align 4
+  store i32 0, ptr @a, align 16
+  store atomic i32 0, ptr @x release, align 16
+  store i32 0, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 1), align 4
+  store atomic i32 1, ptr getelementptr inbounds ([4 x i32], ptr @x, i64 0, i64 1) release, align 4
+  store i32 0, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 2), align 8
+  store atomic i32 2, ptr getelementptr inbounds ([4 x i32], ptr @x, i64 0, i64 2) release, align 8
+  store i32 0, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 3), align 4
+  store atomic i32 3, ptr getelementptr inbounds ([4 x i32], ptr @x, i64 0, i64 3) release, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll
index 1c12ab3cc8b7d..7476ddb0fb873 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
+define void @test1(x86_mmx %a, x86_mmx %b, ptr %ptr) {
 ; Ensure we can handle x86_mmx values which are primitive and can be bitcast
 ; with integer types but can't be put into a vector.
 ;
@@ -14,9 +14,9 @@ define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
 ; CHECK-NEXT:    [[B_CAST:%.*]] = bitcast x86_mmx [[B:%.*]] to i64
 ; CHECK-NEXT:    [[A_AND:%.*]] = and i64 [[A_CAST]], 42
 ; CHECK-NEXT:    [[B_AND:%.*]] = and i64 [[B_CAST]], 42
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i32 1
-; CHECK-NEXT:    store i64 [[A_AND]], i64* [[PTR]], align 8
-; CHECK-NEXT:    store i64 [[B_AND]], i64* [[GEP]], align 8
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[PTR:%.*]], i32 1
+; CHECK-NEXT:    store i64 [[A_AND]], ptr [[PTR]], align 8
+; CHECK-NEXT:    store i64 [[B_AND]], ptr [[GEP]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -24,9 +24,9 @@ entry:
   %b.cast = bitcast x86_mmx %b to i64
   %a.and = and i64 %a.cast, 42
   %b.and = and i64 %b.cast, 42
-  %gep = getelementptr i64, i64* %ptr, i32 1
-  store i64 %a.and, i64* %ptr
-  store i64 %b.and, i64* %gep
+  %gep = getelementptr i64, ptr %ptr, i32 1
+  store i64 %a.and, ptr %ptr
+  store i64 %b.and, ptr %gep
   ret void
 }
 
@@ -66,20 +66,18 @@ exit:
   ret void
 }
 
-define i8 @test3(i8 *%addr) {
+define i8 @test3(ptr %addr) {
 ; Check that we do not vectorize types that are padded to a bigger ones.
 ;
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[ADDR:%.*]] to i2*
-; CHECK-NEXT:    [[A0:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 0
-; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 1
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 2
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i2, i2* [[A]], i64 3
-; CHECK-NEXT:    [[L0:%.*]] = load i2, i2* [[A0]], align 1
-; CHECK-NEXT:    [[L1:%.*]] = load i2, i2* [[A1]], align 1
-; CHECK-NEXT:    [[L2:%.*]] = load i2, i2* [[A2]], align 1
-; CHECK-NEXT:    [[L3:%.*]] = load i2, i2* [[A3]], align 1
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds i2, ptr [[ADDR:%.*]], i64 1
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i2, ptr [[ADDR]], i64 2
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i2, ptr [[ADDR]], i64 3
+; CHECK-NEXT:    [[L0:%.*]] = load i2, ptr [[ADDR]], align 1
+; CHECK-NEXT:    [[L1:%.*]] = load i2, ptr [[A1]], align 1
+; CHECK-NEXT:    [[L2:%.*]] = load i2, ptr [[A2]], align 1
+; CHECK-NEXT:    [[L3:%.*]] = load i2, ptr [[A3]], align 1
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[P0:%.*]] = phi i2 [ [[L0]], [[ENTRY:%.*]] ]
@@ -90,15 +88,13 @@ define i8 @test3(i8 *%addr) {
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
-  %a = bitcast i8* %addr to i2*
-  %a0 = getelementptr inbounds i2, i2* %a, i64 0
-  %a1 = getelementptr inbounds i2, i2* %a, i64 1
-  %a2 = getelementptr inbounds i2, i2* %a, i64 2
-  %a3 = getelementptr inbounds i2, i2* %a, i64 3
-  %l0 = load i2, i2* %a0, align 1
-  %l1 = load i2, i2* %a1, align 1
-  %l2 = load i2, i2* %a2, align 1
-  %l3 = load i2, i2* %a3, align 1
+  %a1 = getelementptr inbounds i2, ptr %addr, i64 1
+  %a2 = getelementptr inbounds i2, ptr %addr, i64 2
+  %a3 = getelementptr inbounds i2, ptr %addr, i64 3
+  %l0 = load i2, ptr %addr, align 1
+  %l1 = load i2, ptr %a1, align 1
+  %l2 = load i2, ptr %a2, align 1
+  %l3 = load i2, ptr %a3, align 1
   br label %bb1
 bb1:                                              ; preds = %entry
   %p0 = phi i2 [ %l0, %entry ]
@@ -111,29 +107,29 @@ bb1:                                              ; preds = %entry
 
 declare void @f(i64, i64)
 
-define void @test4(i32 %a, i28* %ptr) {
+define void @test4(i32 %a, ptr %ptr) {
 ; Check that we do not vectorize types that are padded to a bigger ones.
 ;
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i28
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i28, i28* [[PTR:%.*]], i32 1
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i28, i28* [[PTR]], i32 2
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr i28, i28* [[PTR]], i32 3
-; CHECK-NEXT:    store i28 [[TRUNC]], i28* [[PTR]], align 4
-; CHECK-NEXT:    store i28 [[TRUNC]], i28* [[GEP1]], align 4
-; CHECK-NEXT:    store i28 [[TRUNC]], i28* [[GEP2]], align 4
-; CHECK-NEXT:    store i28 [[TRUNC]], i28* [[GEP3]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i28, ptr [[PTR:%.*]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i28, ptr [[PTR]], i32 2
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr i28, ptr [[PTR]], i32 3
+; CHECK-NEXT:    store i28 [[TRUNC]], ptr [[PTR]], align 4
+; CHECK-NEXT:    store i28 [[TRUNC]], ptr [[GEP1]], align 4
+; CHECK-NEXT:    store i28 [[TRUNC]], ptr [[GEP2]], align 4
+; CHECK-NEXT:    store i28 [[TRUNC]], ptr [[GEP3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %trunc = trunc i32 %a to i28
-  %gep1 = getelementptr i28, i28* %ptr, i32 1
-  %gep2 = getelementptr i28, i28* %ptr, i32 2
-  %gep3 = getelementptr i28, i28* %ptr, i32 3
-  store i28 %trunc, i28* %ptr
-  store i28 %trunc, i28* %gep1
-  store i28 %trunc, i28* %gep2
-  store i28 %trunc, i28* %gep3
+  %gep1 = getelementptr i28, ptr %ptr, i32 1
+  %gep2 = getelementptr i28, ptr %ptr, i32 2
+  %gep3 = getelementptr i28, ptr %ptr, i32 3
+  store i28 %trunc, ptr %ptr
+  store i28 %trunc, ptr %gep1
+  store i28 %trunc, ptr %gep2
+  store i28 %trunc, ptr %gep3
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index ac10e263ca69b..d388fd17925a1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-define i32 @foo(i32* nocapture %A, i32 %n) {
+define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @bar()
@@ -14,27 +14,26 @@ define i32 @foo(i32* nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
   %call = tail call i32 (...) @bar() #2
   %mul = mul nsw i32 %n, 5
   %add = add nsw i32 %mul, 9
-  store i32 %add, i32* %A, align 4
+  store i32 %add, ptr %A, align 4
   %mul1 = mul nsw i32 %n, 9
   %add2 = add nsw i32 %mul1, 9
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
-  store i32 %add2, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 1
+  store i32 %add2, ptr %arrayidx3, align 4
   %mul4 = shl i32 %n, 3
   %add5 = add nsw i32 %mul4, 9
-  %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 2
-  store i32 %add5, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %A, i64 2
+  store i32 %add5, ptr %arrayidx6, align 4
   %mul7 = mul nsw i32 %n, 10
   %add8 = add nsw i32 %mul7, 9
-  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 3
-  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %A, i64 3
+  store i32 %add8, ptr %arrayidx9, align 4
   ret i32 undef
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bitreverse.ll b/llvm/test/Transforms/SLPVectorizer/X86/bitreverse.ll
index c93ad4b80c73c..8311e75572d60 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bitreverse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bitreverse.ll
@@ -23,109 +23,109 @@ declare  i8 @llvm.bitreverse.i8(i8)
 
 define void @bitreverse_2i64() #0 {
 ; CHECK-LABEL: @bitreverse_2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
-; CHECK-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0)
   %bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1)
-  store i64 %bitreverse0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %bitreverse1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %bitreverse0, ptr @dst64, align 8
+  store i64 %bitreverse1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @bitreverse_4i64() #0 {
 ; SSE-LABEL: @bitreverse_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
-; SSE-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP3]])
-; SSE-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE-NEXT:    store <2 x i64> [[TMP4]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bitreverse_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]])
-; AVX-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX-NEXT:    ret void
 ;
 ; XOP-LABEL: @bitreverse_4i64(
-; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; XOP-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]])
-; XOP-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; XOP-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; XOP-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0)
   %bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1)
   %bitreverse2 = call i64 @llvm.bitreverse.i64(i64 %ld2)
   %bitreverse3 = call i64 @llvm.bitreverse.i64(i64 %ld3)
-  store i64 %bitreverse0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %bitreverse1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %bitreverse2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %bitreverse3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %bitreverse0, ptr @dst64, align 4
+  store i64 %bitreverse1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %bitreverse2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %bitreverse3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @bitreverse_4i32() #0 {
 ; CHECK-LABEL: @bitreverse_4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]])
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0)
   %bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1)
   %bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2)
   %bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3)
-  store i32 %bitreverse0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %bitreverse1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %bitreverse2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %bitreverse3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %bitreverse0, ptr @dst32, align 4
+  store i32 %bitreverse1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %bitreverse2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %bitreverse3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @bitreverse_8i32() #0 {
 ; SSE-LABEL: @bitreverse_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]])
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP3]])
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bitreverse_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]])
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX-NEXT:    ret void
 ;
 ; XOP-LABEL: @bitreverse_8i32(
-; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; XOP-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]])
-; XOP-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; XOP-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; XOP-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0)
   %bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1)
   %bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2)
@@ -134,32 +134,32 @@ define void @bitreverse_8i32() #0 {
   %bitreverse5 = call i32 @llvm.bitreverse.i32(i32 %ld5)
   %bitreverse6 = call i32 @llvm.bitreverse.i32(i32 %ld6)
   %bitreverse7 = call i32 @llvm.bitreverse.i32(i32 %ld7)
-  store i32 %bitreverse0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %bitreverse1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %bitreverse2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %bitreverse3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %bitreverse4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %bitreverse5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %bitreverse6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %bitreverse7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %bitreverse0, ptr @dst32, align 2
+  store i32 %bitreverse1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %bitreverse2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %bitreverse3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %bitreverse4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %bitreverse5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %bitreverse6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %bitreverse7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @bitreverse_8i16() #0 {
 ; CHECK-LABEL: @bitreverse_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]])
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0)
   %bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1)
   %bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2)
@@ -168,55 +168,55 @@ define void @bitreverse_8i16() #0 {
   %bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5)
   %bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6)
   %bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7)
-  store i16 %bitreverse0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %bitreverse1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %bitreverse2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %bitreverse3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %bitreverse4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %bitreverse5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %bitreverse6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %bitreverse7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %bitreverse0, ptr @dst16, align 2
+  store i16 %bitreverse1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %bitreverse2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %bitreverse3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %bitreverse4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %bitreverse5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %bitreverse6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %bitreverse7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @bitreverse_16i16() #0 {
 ; SSE-LABEL: @bitreverse_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]])
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP3]])
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bitreverse_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]])
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
 ; XOP-LABEL: @bitreverse_16i16(
-; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; XOP-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]])
-; XOP-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; XOP-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %bitreverse0  = call i16 @llvm.bitreverse.i16(i16 %ld0)
   %bitreverse1  = call i16 @llvm.bitreverse.i16(i16 %ld1)
   %bitreverse2  = call i16 @llvm.bitreverse.i16(i16 %ld2)
@@ -233,48 +233,48 @@ define void @bitreverse_16i16() #0 {
   %bitreverse13 = call i16 @llvm.bitreverse.i16(i16 %ld13)
   %bitreverse14 = call i16 @llvm.bitreverse.i16(i16 %ld14)
   %bitreverse15 = call i16 @llvm.bitreverse.i16(i16 %ld15)
-  store i16 %bitreverse0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %bitreverse1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %bitreverse2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %bitreverse3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %bitreverse4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %bitreverse5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %bitreverse6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %bitreverse7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %bitreverse8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %bitreverse9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %bitreverse10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %bitreverse11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %bitreverse12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %bitreverse13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %bitreverse14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %bitreverse15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %bitreverse0 , ptr @dst16, align 2
+  store i16 %bitreverse1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %bitreverse2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %bitreverse3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %bitreverse4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %bitreverse5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %bitreverse6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %bitreverse7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %bitreverse8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %bitreverse9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %bitreverse10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %bitreverse11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %bitreverse12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %bitreverse13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %bitreverse14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %bitreverse15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 
 define void @bitreverse_16i8() #0 {
 ; CHECK-LABEL: @bitreverse_16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
   %bitreverse0  = call i8 @llvm.bitreverse.i8(i8 %ld0)
   %bitreverse1  = call i8 @llvm.bitreverse.i8(i8 %ld1)
   %bitreverse2  = call i8 @llvm.bitreverse.i8(i8 %ld2)
@@ -291,79 +291,79 @@ define void @bitreverse_16i8() #0 {
   %bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13)
   %bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14)
   %bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15)
-  store i8 %bitreverse0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %bitreverse1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %bitreverse2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %bitreverse3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %bitreverse4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %bitreverse5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %bitreverse6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %bitreverse7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %bitreverse8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %bitreverse9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %bitreverse10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %bitreverse11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %bitreverse12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %bitreverse13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %bitreverse14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %bitreverse15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+  store i8 %bitreverse0 , ptr @dst8, align 1
+  store i8 %bitreverse1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %bitreverse2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %bitreverse3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %bitreverse4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %bitreverse5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %bitreverse6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %bitreverse7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %bitreverse8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %bitreverse9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %bitreverse10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %bitreverse11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %bitreverse12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %bitreverse13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %bitreverse14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %bitreverse15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
   ret void
 }
 
 define void @bitreverse_32i8() #0 {
 ; SSE-LABEL: @bitreverse_32i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP3]])
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bitreverse_32i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> [[TMP1]])
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; AVX-NEXT:    ret void
 ;
 ; XOP-LABEL: @bitreverse_32i8(
-; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; XOP-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> [[TMP1]])
-; XOP-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; XOP-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
-  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
-  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
-  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
-  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
-  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
-  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
-  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
-  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
-  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
-  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
-  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
-  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
-  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
-  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
-  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
-  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
+  %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
+  %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
+  %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
+  %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
+  %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
+  %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
+  %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
+  %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
+  %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
+  %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
+  %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
+  %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
+  %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
+  %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
+  %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
+  %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
   %bitreverse0  = call i8 @llvm.bitreverse.i8(i8 %ld0)
   %bitreverse1  = call i8 @llvm.bitreverse.i8(i8 %ld1)
   %bitreverse2  = call i8 @llvm.bitreverse.i8(i8 %ld2)
@@ -396,38 +396,38 @@ define void @bitreverse_32i8() #0 {
   %bitreverse29 = call i8 @llvm.bitreverse.i8(i8 %ld29)
   %bitreverse30 = call i8 @llvm.bitreverse.i8(i8 %ld30)
   %bitreverse31 = call i8 @llvm.bitreverse.i8(i8 %ld31)
-  store i8 %bitreverse0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %bitreverse1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %bitreverse2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %bitreverse3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %bitreverse4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %bitreverse5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %bitreverse6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %bitreverse7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %bitreverse8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %bitreverse9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %bitreverse10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %bitreverse11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %bitreverse12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %bitreverse13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %bitreverse14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %bitreverse15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
-  store i8 %bitreverse16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
-  store i8 %bitreverse17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
-  store i8 %bitreverse18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
-  store i8 %bitreverse19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
-  store i8 %bitreverse20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
-  store i8 %bitreverse21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
-  store i8 %bitreverse22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
-  store i8 %bitreverse23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
-  store i8 %bitreverse24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
-  store i8 %bitreverse25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
-  store i8 %bitreverse26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
-  store i8 %bitreverse27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
-  store i8 %bitreverse28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
-  store i8 %bitreverse29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
-  store i8 %bitreverse30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
-  store i8 %bitreverse31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+  store i8 %bitreverse0 , ptr @dst8, align 1
+  store i8 %bitreverse1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %bitreverse2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %bitreverse3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %bitreverse4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %bitreverse5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %bitreverse6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %bitreverse7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %bitreverse8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %bitreverse9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %bitreverse10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %bitreverse11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %bitreverse12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %bitreverse13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %bitreverse14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %bitreverse15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
+  store i8 %bitreverse16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
+  store i8 %bitreverse17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
+  store i8 %bitreverse18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
+  store i8 %bitreverse19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
+  store i8 %bitreverse20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
+  store i8 %bitreverse21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
+  store i8 %bitreverse22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
+  store i8 %bitreverse23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
+  store i8 %bitreverse24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
+  store i8 %bitreverse25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
+  store i8 %bitreverse26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
+  store i8 %bitreverse27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
+  store i8 %bitreverse28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
+  store i8 %bitreverse29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
+  store i8 %bitreverse30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
+  store i8 %bitreverse31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 4cc068a6d1665..889f5a95c81d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -1,15 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s
 
-define void @test(i32* %0, i32* %1, i32* %2) {
+define void @test(ptr %0, ptr %1, ptr %2) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP1:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
@@ -22,32 +19,31 @@ define void @test(i32* %0, i32* %1, i32* %2) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP2:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %4 = load i32, i32* %1, align 4
-  %5 = load i32, i32* %0, align 4
-  %6 = getelementptr inbounds i32, i32* %0, i64 4
-  %7 = load i32, i32* %6, align 4
-  %8 = getelementptr inbounds i32, i32* %1, i64 1
-  %9 = load i32, i32* %8, align 4
-  %10 = getelementptr inbounds i32, i32* %0, i64 1
-  %11 = load i32, i32* %10, align 4
-  %12 = getelementptr inbounds i32, i32* %0, i64 5
-  %13 = load i32, i32* %12, align 4
-  %14 = getelementptr inbounds i32, i32* %1, i64 2
-  %15 = load i32, i32* %14, align 4
-  %16 = getelementptr inbounds i32, i32* %0, i64 2
-  %17 = load i32, i32* %16, align 4
-  %18 = getelementptr inbounds i32, i32* %0, i64 6
-  %19 = load i32, i32* %18, align 4
-  %20 = getelementptr inbounds i32, i32* %1, i64 3
-  %21 = load i32, i32* %20, align 4
-  %22 = getelementptr inbounds i32, i32* %0, i64 3
-  %23 = load i32, i32* %22, align 4
-  %24 = getelementptr inbounds i32, i32* %0, i64 7
-  %25 = load i32, i32* %24, align 4
+  %4 = load i32, ptr %1, align 4
+  %5 = load i32, ptr %0, align 4
+  %6 = getelementptr inbounds i32, ptr %0, i64 4
+  %7 = load i32, ptr %6, align 4
+  %8 = getelementptr inbounds i32, ptr %1, i64 1
+  %9 = load i32, ptr %8, align 4
+  %10 = getelementptr inbounds i32, ptr %0, i64 1
+  %11 = load i32, ptr %10, align 4
+  %12 = getelementptr inbounds i32, ptr %0, i64 5
+  %13 = load i32, ptr %12, align 4
+  %14 = getelementptr inbounds i32, ptr %1, i64 2
+  %15 = load i32, ptr %14, align 4
+  %16 = getelementptr inbounds i32, ptr %0, i64 2
+  %17 = load i32, ptr %16, align 4
+  %18 = getelementptr inbounds i32, ptr %0, i64 6
+  %19 = load i32, ptr %18, align 4
+  %20 = getelementptr inbounds i32, ptr %1, i64 3
+  %21 = load i32, ptr %20, align 4
+  %22 = getelementptr inbounds i32, ptr %0, i64 3
+  %23 = load i32, ptr %22, align 4
+  %24 = getelementptr inbounds i32, ptr %0, i64 7
+  %25 = load i32, ptr %24, align 4
   %26 = sub i32 0, %23
   %27 = sub i32 %26, %25
   %28 = add i32 %27, %21
@@ -63,21 +59,21 @@ define void @test(i32* %0, i32* %1, i32* %2) {
   %38 = add i32 %31, 1
   %39 = add i32 %38, 0
   %40 = add i32 %39, 0
-  store i32 %40, i32* %2, align 4
-  %41 = getelementptr inbounds i32, i32* %2, i64 2
+  store i32 %40, ptr %2, align 4
+  %41 = getelementptr inbounds i32, ptr %2, i64 2
   %42 = add i32 0, %34
   %43 = sub i32 %42, 0
   %44 = sub i32 %43, 0
-  store i32 %44, i32* %41, align 4
-  %45 = getelementptr inbounds i32, i32* %2, i64 1
+  store i32 %44, ptr %41, align 4
+  %45 = getelementptr inbounds i32, ptr %2, i64 1
   %46 = add i32 %37, 0
   %47 = sub i32 %46, 0
   %48 = sub i32 %47, 0
-  store i32 %48, i32* %45, align 4
-  %49 = getelementptr inbounds i32, i32* %2, i64 3
+  store i32 %48, ptr %45, align 4
+  %49 = getelementptr inbounds i32, ptr %2, i64 3
   %50 = sub i32 %28, 0
   %51 = sub i32 %50, 0
   %52 = add i32 %51, 0
-  store i32 %52, i32* %49, align 4
+  store i32 %52, ptr %49, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
index c7fbf2ccb2c83..dc5900d739fed 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
@@ -10,26 +10,24 @@
 ; We broadcast %v1 and %v2
 ;
 
-define void @bcast_vals(i64 *%A, i64 *%B, i64 *%S) {
+define void @bcast_vals(ptr %A, ptr %B, ptr %S) {
 ; CHECK-LABEL: @bcast_vals(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
-; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* [[B:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[V1:%.*]] = sub i64 [[A0]], 1
 ; CHECK-NEXT:    [[V2:%.*]] = sub i64 [[B0]], 1
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V1]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[S:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %A0 = load i64, i64 *%A, align 8
-  %B0 = load i64, i64 *%B, align 8
+  %A0 = load i64, ptr %A, align 8
+  %B0 = load i64, ptr %B, align 8
 
   %v1 = sub i64 %A0, 1
   %v2 = sub i64 %B0, 1
@@ -39,15 +37,14 @@ entry:
   %Add2 = add i64 %v2, %v1
   %Add3 = add i64 %v1, %v2
 
-  %idxS0 = getelementptr inbounds i64, i64* %S, i64 0
-  %idxS1 = getelementptr inbounds i64, i64* %S, i64 1
-  %idxS2 = getelementptr inbounds i64, i64* %S, i64 2
-  %idxS3 = getelementptr inbounds i64, i64* %S, i64 3
+  %idxS1 = getelementptr inbounds i64, ptr %S, i64 1
+  %idxS2 = getelementptr inbounds i64, ptr %S, i64 2
+  %idxS3 = getelementptr inbounds i64, ptr %S, i64 3
 
-  store i64 %Add0, i64 *%idxS0, align 8
-  store i64 %Add1, i64 *%idxS1, align 8
-  store i64 %Add2, i64 *%idxS2, align 8
-  store i64 %Add3, i64 *%idxS3, align 8
+  store i64 %Add0, ptr %S, align 8
+  store i64 %Add1, ptr %idxS1, align 8
+  store i64 %Add2, ptr %idxS2, align 8
+  store i64 %Add3, ptr %idxS3, align 8
   ret void
 }
 
@@ -59,16 +56,15 @@ entry:
 ; We broadcast %v1.
 
 ;
-define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
+define void @bcast_vals2(ptr %A, ptr %B, ptr %C, ptr %D, ptr %E, ptr %S) {
 ; CHECK-LABEL: @bcast_vals2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* [[A:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i16, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[V1:%.*]] = sext i16 [[A0]] to i32
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
-; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* [[B:%.*]], align 8
-; CHECK-NEXT:    [[C0:%.*]] = load i16, i16* [[C:%.*]], align 8
-; CHECK-NEXT:    [[D0:%.*]] = load i16, i16* [[D:%.*]], align 8
-; CHECK-NEXT:    [[E0:%.*]] = load i16, i16* [[E:%.*]], align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i16, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[C0:%.*]] = load i16, ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[D0:%.*]] = load i16, ptr [[D:%.*]], align 8
+; CHECK-NEXT:    [[E0:%.*]] = load i16, ptr [[E:%.*]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> [[TMP0]], i16 [[C0]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[E0]], i32 2
@@ -77,16 +73,15 @@ define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[V1]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[SHUFFLE]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 8
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[S:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %A0 = load i16, i16 *%A, align 8
-  %B0 = load i16, i16 *%B, align 8
-  %C0 = load i16, i16 *%C, align 8
-  %D0 = load i16, i16 *%D, align 8
-  %E0 = load i16, i16 *%E, align 8
+  %A0 = load i16, ptr %A, align 8
+  %B0 = load i16, ptr %B, align 8
+  %C0 = load i16, ptr %C, align 8
+  %D0 = load i16, ptr %D, align 8
+  %E0 = load i16, ptr %E, align 8
 
   %v1 = sext i16 %A0 to i32
   %v2 = sext i16 %B0 to i32
@@ -99,14 +94,13 @@ entry:
   %Add2 = add i32 %v5, %v1
   %Add3 = add i32 %v1, %v4
 
-  %idxS0 = getelementptr inbounds i32, i32* %S, i64 0
-  %idxS1 = getelementptr inbounds i32, i32* %S, i64 1
-  %idxS2 = getelementptr inbounds i32, i32* %S, i64 2
-  %idxS3 = getelementptr inbounds i32, i32* %S, i64 3
+  %idxS1 = getelementptr inbounds i32, ptr %S, i64 1
+  %idxS2 = getelementptr inbounds i32, ptr %S, i64 2
+  %idxS3 = getelementptr inbounds i32, ptr %S, i64 3
 
-  store i32 %Add0, i32 *%idxS0, align 8
-  store i32 %Add1, i32 *%idxS1, align 8
-  store i32 %Add2, i32 *%idxS2, align 8
-  store i32 %Add3, i32 *%idxS3, align 8
+  store i32 %Add0, ptr %S, align 8
+  store i32 %Add1, ptr %idxS1, align 8
+  store i32 %Add2, ptr %idxS2, align 8
+  store i32 %Add3, ptr %idxS3, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
index e3e6910b931ee..a5c953d48a555 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
@@ -12,37 +12,34 @@
 ; YAML-NEXT:  - String:          ' and with tree size '
 ; YAML-NEXT:  - TreeSize:        '2'
 
-define void @bcast_long(i32 *%A, i32 *%S) {
+define void @bcast_long(ptr %A, ptr %S) {
 ; CHECK-LABEL: @bcast_long(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* [[A:%.*]], align 8
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
+; CHECK-NEXT:    [[A0:%.*]] = load i32, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP1:%.*]] = freeze <8 x i32> [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[TMP1]], <8 x i32>* [[TMP2]], align 8
+; CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[S:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %A0 = load i32, i32 *%A, align 8
+  %A0 = load i32, ptr %A, align 8
 
-  %idxS0 = getelementptr inbounds i32, i32* %S, i64 0
-  %idxS1 = getelementptr inbounds i32, i32* %S, i64 1
-  %idxS2 = getelementptr inbounds i32, i32* %S, i64 2
-  %idxS3 = getelementptr inbounds i32, i32* %S, i64 3
-  %idxS4 = getelementptr inbounds i32, i32* %S, i64 4
-  %idxS5 = getelementptr inbounds i32, i32* %S, i64 5
-  %idxS6 = getelementptr inbounds i32, i32* %S, i64 6
-  %idxS7 = getelementptr inbounds i32, i32* %S, i64 7
+  %idxS1 = getelementptr inbounds i32, ptr %S, i64 1
+  %idxS2 = getelementptr inbounds i32, ptr %S, i64 2
+  %idxS3 = getelementptr inbounds i32, ptr %S, i64 3
+  %idxS4 = getelementptr inbounds i32, ptr %S, i64 4
+  %idxS5 = getelementptr inbounds i32, ptr %S, i64 5
+  %idxS6 = getelementptr inbounds i32, ptr %S, i64 6
+  %idxS7 = getelementptr inbounds i32, ptr %S, i64 7
 
-  store i32 %A0, i32 *%idxS0, align 8
-  store i32 %A0, i32 *%idxS1, align 8
-  store i32 undef, i32 *%idxS2, align 8
-  store i32 %A0, i32 *%idxS3, align 8
-  store i32 %A0, i32 *%idxS4, align 8
-  store i32 %A0, i32 *%idxS5, align 8
-  store i32 %A0, i32 *%idxS6, align 8
-  store i32 %A0, i32 *%idxS7, align 8
+  store i32 %A0, ptr %S, align 8
+  store i32 %A0, ptr %idxS1, align 8
+  store i32 undef, ptr %idxS2, align 8
+  store i32 %A0, ptr %idxS3, align 8
+  store i32 %A0, ptr %idxS4, align 8
+  store i32 %A0, ptr %idxS5, align 8
+  store i32 %A0, ptr %idxS6, align 8
+  store i32 %A0, ptr %idxS7, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/bswap.ll b/llvm/test/Transforms/SLPVectorizer/X86/bswap.ll
index f7964b4ddcdd3..5d4b1f08405bc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bswap.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bswap.ll
@@ -18,112 +18,112 @@ declare i16 @llvm.bswap.i16(i16)
 
 define void @bswap_2i64() #0 {
 ; SSE-LABEL: @bswap_2i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]])
 ; SSE-NEXT:    [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]])
-; SSE-NEXT:    store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[BSWAP0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[BSWAP1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bswap_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
-; AVX-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %bswap0 = call i64 @llvm.bswap.i64(i64 %ld0)
   %bswap1 = call i64 @llvm.bswap.i64(i64 %ld1)
-  store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %bswap0, ptr @dst64, align 8
+  store i64 %bswap1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @bswap_4i64() #0 {
 ; SSE-LABEL: @bswap_4i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]])
 ; SSE-NEXT:    [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]])
 ; SSE-NEXT:    [[BSWAP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD2]])
 ; SSE-NEXT:    [[BSWAP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD3]])
-; SSE-NEXT:    store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE-NEXT:    store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE-NEXT:    store i64 [[BSWAP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE-NEXT:    store i64 [[BSWAP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT:    store i64 [[BSWAP0]], ptr @dst64, align 4
+; SSE-NEXT:    store i64 [[BSWAP1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; SSE-NEXT:    store i64 [[BSWAP2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; SSE-NEXT:    store i64 [[BSWAP3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bswap_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> [[TMP1]])
-; AVX-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %bswap0 = call i64 @llvm.bswap.i64(i64 %ld0)
   %bswap1 = call i64 @llvm.bswap.i64(i64 %ld1)
   %bswap2 = call i64 @llvm.bswap.i64(i64 %ld2)
   %bswap3 = call i64 @llvm.bswap.i64(i64 %ld3)
-  store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %bswap2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %bswap3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %bswap0, ptr @dst64, align 4
+  store i64 %bswap1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %bswap2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %bswap3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @bswap_4i32() #0 {
 ; CHECK-LABEL: @bswap_4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %bswap0 = call i32 @llvm.bswap.i32(i32 %ld0)
   %bswap1 = call i32 @llvm.bswap.i32(i32 %ld1)
   %bswap2 = call i32 @llvm.bswap.i32(i32 %ld2)
   %bswap3 = call i32 @llvm.bswap.i32(i32 %ld3)
-  store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %bswap0, ptr @dst32, align 4
+  store i32 %bswap1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %bswap2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %bswap3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @bswap_8i32() #0 {
 ; SSE-LABEL: @bswap_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP3]])
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bswap_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> [[TMP1]])
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %bswap0 = call i32 @llvm.bswap.i32(i32 %ld0)
   %bswap1 = call i32 @llvm.bswap.i32(i32 %ld1)
   %bswap2 = call i32 @llvm.bswap.i32(i32 %ld2)
@@ -132,32 +132,32 @@ define void @bswap_8i32() #0 {
   %bswap5 = call i32 @llvm.bswap.i32(i32 %ld5)
   %bswap6 = call i32 @llvm.bswap.i32(i32 %ld6)
   %bswap7 = call i32 @llvm.bswap.i32(i32 %ld7)
-  store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %bswap4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %bswap5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %bswap6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %bswap7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %bswap0, ptr @dst32, align 2
+  store i32 %bswap1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %bswap2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %bswap3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %bswap4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %bswap5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %bswap6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %bswap7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @bswap_8i16() #0 {
 ; CHECK-LABEL: @bswap_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]])
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %bswap0 = call i16 @llvm.bswap.i16(i16 %ld0)
   %bswap1 = call i16 @llvm.bswap.i16(i16 %ld1)
   %bswap2 = call i16 @llvm.bswap.i16(i16 %ld2)
@@ -166,49 +166,49 @@ define void @bswap_8i16() #0 {
   %bswap5 = call i16 @llvm.bswap.i16(i16 %ld5)
   %bswap6 = call i16 @llvm.bswap.i16(i16 %ld6)
   %bswap7 = call i16 @llvm.bswap.i16(i16 %ld7)
-  store i16 %bswap0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %bswap1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %bswap2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %bswap3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %bswap4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %bswap5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %bswap6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %bswap7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %bswap0, ptr @dst16, align 2
+  store i16 %bswap1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %bswap2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %bswap3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %bswap4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %bswap5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %bswap6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %bswap7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @bswap_16i16() #0 {
 ; SSE-LABEL: @bswap_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]])
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP3]])
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bswap_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> [[TMP1]])
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %bswap0  = call i16 @llvm.bswap.i16(i16 %ld0)
   %bswap1  = call i16 @llvm.bswap.i16(i16 %ld1)
   %bswap2  = call i16 @llvm.bswap.i16(i16 %ld2)
@@ -225,22 +225,22 @@ define void @bswap_16i16() #0 {
   %bswap13 = call i16 @llvm.bswap.i16(i16 %ld13)
   %bswap14 = call i16 @llvm.bswap.i16(i16 %ld14)
   %bswap15 = call i16 @llvm.bswap.i16(i16 %ld15)
-  store i16 %bswap0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %bswap1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %bswap2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %bswap3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %bswap4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %bswap5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %bswap6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %bswap7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %bswap8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %bswap9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %bswap10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %bswap11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %bswap12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %bswap13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %bswap14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %bswap15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %bswap0 , ptr @dst16, align 2
+  store i16 %bswap1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %bswap2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %bswap3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %bswap4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %bswap5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %bswap6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %bswap7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %bswap8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %bswap9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %bswap10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %bswap11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %bswap12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %bswap13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %bswap14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %bswap15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 5faba676b1d6c..4181148a4d829 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -12,106 +12,96 @@ declare double @sqrt(double) nounwind willreturn
 declare i64 @round(i64) nounwind willreturn
 
 
-define void @sin_libm(double* %a, double* %b) {
+define void @sin_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @sin_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* %a, align 8
-  %idx1 = getelementptr inbounds double, double* %a, i64 1
-  %a1 = load double, double* %idx1, align 8
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
   %sin1 = tail call double @sin(double %a0) nounwind readnone
   %sin2 = tail call double @sin(double %a1) nounwind readnone
-  store double %sin1, double* %b, align 8
-  %idx2 = getelementptr inbounds double, double* %b, i64 1
-  store double %sin2, double* %idx2, align 8
+  store double %sin1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %sin2, ptr %idx2, align 8
   ret void
 }
 
-define void @cos_libm(double* %a, double* %b) {
+define void @cos_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @cos_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* %a, align 8
-  %idx1 = getelementptr inbounds double, double* %a, i64 1
-  %a1 = load double, double* %idx1, align 8
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
   %cos1 = tail call double @cos(double %a0) nounwind readnone
   %cos2 = tail call double @cos(double %a1) nounwind readnone
-  store double %cos1, double* %b, align 8
-  %idx2 = getelementptr inbounds double, double* %b, i64 1
-  store double %cos2, double* %idx2, align 8
+  store double %cos1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %cos2, ptr %idx2, align 8
   ret void
 }
 
-define void @pow_libm(double* %a, double* %b) {
+define void @pow_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @pow_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* %a, align 8
-  %idx1 = getelementptr inbounds double, double* %a, i64 1
-  %a1 = load double, double* %idx1, align 8
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
   %pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
   %pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
-  store double %pow1, double* %b, align 8
-  %idx2 = getelementptr inbounds double, double* %b, i64 1
-  store double %pow2, double* %idx2, align 8
+  store double %pow1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %pow2, ptr %idx2, align 8
   ret void
 }
 
-define void @exp_libm(double* %a, double* %b) {
+define void @exp_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @exp_libm(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* %a, align 8
-  %idx1 = getelementptr inbounds double, double* %a, i64 1
-  %a1 = load double, double* %idx1, align 8
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
   %exp1 = tail call double @exp2(double %a0) nounwind readnone
   %exp2 = tail call double @exp2(double %a1) nounwind readnone
-  store double %exp1, double* %b, align 8
-  %idx2 = getelementptr inbounds double, double* %b, i64 1
-  store double %exp2, double* %idx2, align 8
+  store double %exp1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %exp2, ptr %idx2, align 8
   ret void
 }
 
 ; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
 ; We just need to know that errno is not set (readnone).
 
-define void @sqrt_libm_no_errno(double* %a, double* %b) {
+define void @sqrt_libm_no_errno(ptr %a, ptr %b) {
 ; CHECK-LABEL: @sqrt_libm_no_errno(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* %a, align 8
-  %idx1 = getelementptr inbounds double, double* %a, i64 1
-  %a1 = load double, double* %idx1, align 8
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
   %sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
   %sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
-  store double %sqrt1, double* %b, align 8
-  %idx2 = getelementptr inbounds double, double* %b, i64 1
-  store double %sqrt2, double* %idx2, align 8
+  store double %sqrt1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %sqrt2, ptr %idx2, align 8
   ret void
 }
 
@@ -120,50 +110,50 @@ define void @sqrt_libm_no_errno(double* %a, double* %b) {
 ; input would result in a nan output ("On a domain error, the function returns an
 ; implementation-defined value.")
 
-define void @sqrt_libm_errno(double* %a, double* %b) {
+define void @sqrt_libm_errno(ptr %a, ptr %b) {
 ; CHECK-LABEL: @sqrt_libm_errno(
-; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDX1]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
 ; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR3]]
-; CHECK-NEXT:    store double [[SQRT1]], double* [[B:%.*]], align 8
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    store double [[SQRT2]], double* [[IDX2]], align 8
+; CHECK-NEXT:    store double [[SQRT1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    store double [[SQRT2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* %a, align 8
-  %idx1 = getelementptr inbounds double, double* %a, i64 1
-  %a1 = load double, double* %idx1, align 8
+  %a0 = load double, ptr %a, align 8
+  %idx1 = getelementptr inbounds double, ptr %a, i64 1
+  %a1 = load double, ptr %idx1, align 8
   %sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
   %sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
-  store double %sqrt1, double* %b, align 8
-  %idx2 = getelementptr inbounds double, double* %b, i64 1
-  store double %sqrt2, double* %idx2, align 8
+  store double %sqrt1, ptr %b, align 8
+  %idx2 = getelementptr inbounds double, ptr %b, i64 1
+  store double %sqrt2, ptr %idx2, align 8
   ret void
 }
 
 ; Negative test case
-define void @round_custom(i64* %a, i64* %b) {
+define void @round_custom(ptr %a, ptr %b) {
 ; CHECK-LABEL: @round_custom(
-; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
-; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr [[IDX1]], align 8
 ; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR4]]
-; CHECK-NEXT:    store i64 [[ROUND1]], i64* [[B:%.*]], align 8
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
-; CHECK-NEXT:    store i64 [[ROUND2]], i64* [[IDX2]], align 8
+; CHECK-NEXT:    store i64 [[ROUND1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
+; CHECK-NEXT:    store i64 [[ROUND2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load i64, i64* %a, align 8
-  %idx1 = getelementptr inbounds i64, i64* %a, i64 1
-  %a1 = load i64, i64* %idx1, align 8
+  %a0 = load i64, ptr %a, align 8
+  %idx1 = getelementptr inbounds i64, ptr %a, i64 1
+  %a1 = load i64, ptr %idx1, align 8
   %round1 = tail call i64 @round(i64 %a0) nounwind readnone
   %round2 = tail call i64 @round(i64 %a1) nounwind readnone
-  store i64 %round1, i64* %b, align 8
-  %idx2 = getelementptr inbounds i64, i64* %b, i64 1
-  store i64 %round2, i64* %idx2, align 8
+  store i64 %round1, ptr %b, align 8
+  %idx2 = getelementptr inbounds i64, ptr %b, i64 1
+  store i64 %round2, ptr %idx2, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
index aab16b74bc2bd..2736f18215117 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -11,114 +11,104 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;     A[3] = B[3];
 ; }
 
-define i32 @test_sext_4i8_to_4i32(i32* noalias nocapture %A, i8* noalias nocapture %B) {
+define i32 @test_sext_4i8_to_4i32(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @test_sext_4i8_to_4i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i8, i8* %B, align 1
+  %0 = load i8, ptr %B, align 1
   %conv = sext i8 %0 to i32
-  store i32 %conv, i32* %A, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 1
-  %1 = load i8, i8* %arrayidx2, align 1
+  store i32 %conv, ptr %A, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 1
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = sext i8 %1 to i32
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  store i32 %conv3, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i8, i8* %B, i64 2
-  %2 = load i8, i8* %arrayidx5, align 1
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
+  store i32 %conv3, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i8, ptr %B, i64 2
+  %2 = load i8, ptr %arrayidx5, align 1
   %conv6 = sext i8 %2 to i32
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 2
-  store i32 %conv6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i8, i8* %B, i64 3
-  %3 = load i8, i8* %arrayidx8, align 1
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 2
+  store i32 %conv6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i8, ptr %B, i64 3
+  %3 = load i8, ptr %arrayidx8, align 1
   %conv9 = sext i8 %3 to i32
-  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3
-  store i32 %conv9, i32* %arrayidx10, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %A, i64 3
+  store i32 %conv9, ptr %arrayidx10, align 4
   ret i32 undef
 }
 
-define i32 @test_zext_4i16_to_4i32(i32* noalias nocapture %A, i16* noalias nocapture %B) {
+define i32 @test_zext_4i16_to_4i32(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @test_zext_4i16_to_4i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i16, i16* %B, align 1
+  %0 = load i16, ptr %B, align 1
   %conv = zext i16 %0 to i32
-  store i32 %conv, i32* %A, align 4
-  %arrayidx2 = getelementptr inbounds i16, i16* %B, i64 1
-  %1 = load i16, i16* %arrayidx2, align 1
+  store i32 %conv, ptr %A, align 4
+  %arrayidx2 = getelementptr inbounds i16, ptr %B, i64 1
+  %1 = load i16, ptr %arrayidx2, align 1
   %conv3 = zext i16 %1 to i32
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  store i32 %conv3, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i16, i16* %B, i64 2
-  %2 = load i16, i16* %arrayidx5, align 1
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
+  store i32 %conv3, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i16, ptr %B, i64 2
+  %2 = load i16, ptr %arrayidx5, align 1
   %conv6 = zext i16 %2 to i32
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 2
-  store i32 %conv6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i16, i16* %B, i64 3
-  %3 = load i16, i16* %arrayidx8, align 1
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 2
+  store i32 %conv6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i16, ptr %B, i64 3
+  %3 = load i16, ptr %arrayidx8, align 1
   %conv9 = zext i16 %3 to i32
-  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3
-  store i32 %conv9, i32* %arrayidx10, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %A, i64 3
+  store i32 %conv9, ptr %arrayidx10, align 4
   ret i32 undef
 }
 
-define i64 @test_sext_4i16_to_4i64(i64* noalias nocapture %A, i16* noalias nocapture %B) {
+define i64 @test_sext_4i16_to_4i64(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; SSE42-LABEL: @test_sext_4i16_to_4i64(
 ; SSE42-NEXT:  entry:
-; SSE42-NEXT:    [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <2 x i16>*
-; SSE42-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* [[TMP0]], align 1
+; SSE42-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr [[B:%.*]], align 1
 ; SSE42-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64>
-; SSE42-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; SSE42-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4
-; SSE42-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 2
-; SSE42-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2
-; SSE42-NEXT:    [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX5]] to <2 x i16>*
-; SSE42-NEXT:    [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* [[TMP4]], align 1
+; SSE42-NEXT:    store <2 x i64> [[TMP2]], ptr [[A:%.*]], align 4
+; SSE42-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 2
+; SSE42-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2
+; SSE42-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr [[ARRAYIDX5]], align 1
 ; SSE42-NEXT:    [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64>
-; SSE42-NEXT:    [[TMP7:%.*]] = bitcast i64* [[ARRAYIDX7]] to <2 x i64>*
-; SSE42-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; SSE42-NEXT:    store <2 x i64> [[TMP6]], ptr [[ARRAYIDX7]], align 4
 ; SSE42-NEXT:    ret i64 undef
 ;
 ; AVX-LABEL: @test_sext_4i16_to_4i64(
 ; AVX-NEXT:  entry:
-; AVX-NEXT:    [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[B:%.*]], align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64>
-; AVX-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A:%.*]] to <4 x i64>*
-; AVX-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 4
+; AVX-NEXT:    store <4 x i64> [[TMP2]], ptr [[A:%.*]], align 4
 ; AVX-NEXT:    ret i64 undef
 ;
 entry:
-  %0 = load i16, i16* %B, align 1
+  %0 = load i16, ptr %B, align 1
   %conv = sext i16 %0 to i64
-  store i64 %conv, i64* %A, align 4
-  %arrayidx2 = getelementptr inbounds i16, i16* %B, i64 1
-  %1 = load i16, i16* %arrayidx2, align 1
+  store i64 %conv, ptr %A, align 4
+  %arrayidx2 = getelementptr inbounds i16, ptr %B, i64 1
+  %1 = load i16, ptr %arrayidx2, align 1
   %conv3 = sext i16 %1 to i64
-  %arrayidx4 = getelementptr inbounds i64, i64* %A, i64 1
-  store i64 %conv3, i64* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i16, i16* %B, i64 2
-  %2 = load i16, i16* %arrayidx5, align 1
+  %arrayidx4 = getelementptr inbounds i64, ptr %A, i64 1
+  store i64 %conv3, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i16, ptr %B, i64 2
+  %2 = load i16, ptr %arrayidx5, align 1
   %conv6 = sext i16 %2 to i64
-  %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 2
-  store i64 %conv6, i64* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i16, i16* %B, i64 3
-  %3 = load i16, i16* %arrayidx8, align 1
+  %arrayidx7 = getelementptr inbounds i64, ptr %A, i64 2
+  store i64 %conv6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i16, ptr %B, i64 3
+  %3 = load i16, ptr %arrayidx8, align 1
   %conv9 = sext i16 %3 to i64
-  %arrayidx10 = getelementptr inbounds i64, i64* %A, i64 3
-  store i64 %conv9, i64* %arrayidx10, align 4
+  %arrayidx10 = getelementptr inbounds i64, ptr %A, i64 3
+  store i64 %conv9, ptr %arrayidx10, align 4
   ret i64 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/catchswitch.ll b/llvm/test/Transforms/SLPVectorizer/X86/catchswitch.ll
index 996524e220eb0..f8a7a9503ca52 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/catchswitch.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/catchswitch.ll
@@ -6,13 +6,13 @@
 ; is invalid.  Only phis and the catchswitch may be present, so we must
 ; avoid trying to insert shuffles into such a block.
 
-%typeA = type { i8**, i8*, [20 x i8] }
+%typeA = type { ptr, ptr, [20 x i8] }
 @globalA = external global %typeA
 
 declare i32 @__CxxFrameHandler3(...)
 declare void @funcA()
 
-define void @important_func() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+define void @important_func() personality ptr @__CxxFrameHandler3 {
 ; CHECK-LABEL: @important_func(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LABELB:%.*]]
@@ -45,7 +45,7 @@ define void @important_func() personality i8* bitcast (i32 (...)* @__CxxFrameHan
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi float [ [[H]], [[LABELG]] ], [ [[F]], [[LABELE]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = catchswitch within none [label %catch] unwind to caller
 ; CHECK:       catch:
-; CHECK-NEXT:    [[TMP6:%.*]] = catchpad within [[TMP5]] [%typeA* @globalA, i32 8, i8* null]
+; CHECK-NEXT:    [[TMP6:%.*]] = catchpad within [[TMP5]] [ptr @globalA, i32 8, ptr null]
 ; CHECK-NEXT:    unreachable
 ;
 entry:
@@ -88,6 +88,6 @@ catch.dispatch:
   %5 = catchswitch within none [label %catch] unwind to caller
 
 catch:
-  %6 = catchpad within %5 [%typeA* @globalA, i32 8, i8* null]
+  %6 = catchpad within %5 [ptr @globalA, i32 8, ptr null]
   unreachable
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
index b0e6fe10c6f96..8b265ed24bebb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -1,33 +1,32 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -mcpu=skx -S -o - -mtriple=x86_64-unknown < %s | FileCheck %s
 
-define void @test(double* %0, double %1) {
+define void @test(ptr %0, double %1) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP0:%.*]], i32 6
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0:%.*]], i32 6
 ; CHECK-NEXT:    br label [[TMP4:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP1:%.*]], i32 1
 ; CHECK-NEXT:    br label [[TMP6:%.*]]
 ; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = load double, double* null, align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP7]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP5]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x double> [[TMP10]], zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x double> zeroinitializer, [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP12]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label [[TMP6]]
 ;
-  %3 = getelementptr inbounds double, double* %0, i32 6
-  %4 = getelementptr inbounds double, double* %0, i32 7
+  %3 = getelementptr inbounds double, ptr %0, i32 6
+  %4 = getelementptr inbounds double, ptr %0, i32 7
   br label %5
 
 5:                                                ; preds = %2
   br label %6
 
 6:                                                ; preds = %6, %5
-  %7 = load double, double* null, align 8
+  %7 = load double, ptr null, align 8
   %8 = fcmp olt double 0.000000e+00, 0.000000e+00
   %9 = select i1 %8, double 0.000000e+00, double 0.000000e+00
   %10 = fcmp ogt double %7, %1
@@ -35,9 +34,9 @@ define void @test(double* %0, double %1) {
   %12 = fmul double %9, 0.000000e+00
   %13 = fmul double 0.000000e+00, %11
   %14 = fadd double 0.000000e+00, %12
-  store double %14, double* %3, align 8
+  store double %14, ptr %3, align 8
   %15 = fadd double 0.000000e+00, %13
-  store double %15, double* %4, align 8
+  store double %15, ptr %4, align 8
   br label %6
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll
index fcd63674093eb..997b8ac8add32 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute-inseltpoison.ll
@@ -6,10 +6,9 @@
 ; Check that we can commute operands based on the predicate.
 ;
 
-define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_eq_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -18,14 +17,13 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp eq i32 %a0, %b0
   %c1 = icmp eq i32 %b1, %a1
   %c2 = icmp eq i32 %b2, %a2
@@ -38,10 +36,9 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_ne_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -50,14 +47,13 @@ define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp ne i32 %a0, %b0
   %c1 = icmp ne i32 %b1, %a1
   %c2 = icmp ne i32 %b2, %a2
@@ -70,10 +66,9 @@ define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_oeq_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -82,14 +77,13 @@ define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp oeq float %a0, %b0
   %c1 = fcmp oeq float %b1, %a1
   %c2 = fcmp oeq float %b2, %a2
@@ -102,10 +96,9 @@ define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_uno_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -114,14 +107,13 @@ define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp uno float %a0, %b0
   %c1 = fcmp uno float %b1, %a1
   %c2 = fcmp uno float %b2, %a2
@@ -138,10 +130,9 @@ define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
 ; Check that we can commute operands by swapping the predicate.
 ;
 
-define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_sgt_slt_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -150,14 +141,13 @@ define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp sgt i32 %a0, %b0
   %c1 = icmp slt i32 %b1, %a1
   %c2 = icmp slt i32 %b2, %a2
@@ -170,10 +160,9 @@ define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_uge_ule_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -182,14 +171,13 @@ define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp uge i32 %a0, %b0
   %c1 = icmp ule i32 %b1, %a1
   %c2 = icmp ule i32 %b2, %a2
@@ -202,10 +190,9 @@ define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -214,14 +201,13 @@ define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp ogt float %a0, %b0
   %c1 = fcmp olt float %b1, %a1
   %c2 = fcmp olt float %b2, %a2
@@ -234,10 +220,9 @@ define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_ord_uno_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ord <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -248,14 +233,13 @@ define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp ord float %a0, %b0
   %c1 = fcmp uno float %b1, %a1
   %c2 = fcmp uno float %b2, %a2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll
index c0c433c74f4f4..29cf66a1ea656 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_commute.ll
@@ -6,10 +6,9 @@
 ; Check that we can commute operands based on the predicate.
 ;
 
-define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_eq_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -18,14 +17,13 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp eq i32 %a0, %b0
   %c1 = icmp eq i32 %b1, %a1
   %c2 = icmp eq i32 %b2, %a2
@@ -38,10 +36,9 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_ne_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -50,14 +47,13 @@ define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp ne i32 %a0, %b0
   %c1 = icmp ne i32 %b1, %a1
   %c2 = icmp ne i32 %b2, %a2
@@ -70,10 +66,9 @@ define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_oeq_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -82,14 +77,13 @@ define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp oeq float %a0, %b0
   %c1 = fcmp oeq float %b1, %a1
   %c2 = fcmp oeq float %b2, %a2
@@ -102,10 +96,9 @@ define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_uno_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -114,14 +107,13 @@ define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp uno float %a0, %b0
   %c1 = fcmp uno float %b1, %a1
   %c2 = fcmp uno float %b2, %a2
@@ -138,10 +130,9 @@ define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) {
 ; Check that we can commute operands by swapping the predicate.
 ;
 
-define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_sgt_slt_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -150,14 +141,13 @@ define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp sgt i32 %a0, %b0
   %c1 = icmp slt i32 %b1, %a1
   %c2 = icmp slt i32 %b2, %a2
@@ -170,10 +160,9 @@ define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
+define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @icmp_uge_ule_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -182,14 +171,13 @@ define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
   %a1 = extractelement <4 x i32> %a, i32 1
   %a2 = extractelement <4 x i32> %a, i32 2
   %a3 = extractelement <4 x i32> %a, i32 3
-  %p0 = getelementptr inbounds i32, i32* %b, i32 0
-  %p1 = getelementptr inbounds i32, i32* %b, i32 1
-  %p2 = getelementptr inbounds i32, i32* %b, i32 2
-  %p3 = getelementptr inbounds i32, i32* %b, i32 3
-  %b0 = load i32, i32* %p0, align 4
-  %b1 = load i32, i32* %p1, align 4
-  %b2 = load i32, i32* %p2, align 4
-  %b3 = load i32, i32* %p3, align 4
+  %p1 = getelementptr inbounds i32, ptr %b, i32 1
+  %p2 = getelementptr inbounds i32, ptr %b, i32 2
+  %p3 = getelementptr inbounds i32, ptr %b, i32 3
+  %b0 = load i32, ptr %b, align 4
+  %b1 = load i32, ptr %p1, align 4
+  %b2 = load i32, ptr %p2, align 4
+  %b3 = load i32, ptr %p3, align 4
   %c0 = icmp uge i32 %a0, %b0
   %c1 = icmp ule i32 %b1, %a1
   %c2 = icmp ule i32 %b2, %a2
@@ -202,10 +190,9 @@ define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_ogt_olt_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
@@ -214,14 +201,13 @@ define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp ogt float %a0, %b0
   %c1 = fcmp olt float %b1, %a1
   %c2 = fcmp olt float %b2, %a2
@@ -234,10 +220,9 @@ define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
   ret <4 x i32> %r
 }
 
-define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
+define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, ptr %b) {
 ; CHECK-LABEL: @fcmp_ord_uno_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ord <4 x float> [[TMP2]], [[A:%.*]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -248,14 +233,13 @@ define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
   %a1 = extractelement <4 x float> %a, i32 1
   %a2 = extractelement <4 x float> %a, i32 2
   %a3 = extractelement <4 x float> %a, i32 3
-  %p0 = getelementptr inbounds float, float* %b, i32 0
-  %p1 = getelementptr inbounds float, float* %b, i32 1
-  %p2 = getelementptr inbounds float, float* %b, i32 2
-  %p3 = getelementptr inbounds float, float* %b, i32 3
-  %b0 = load float, float* %p0, align 4
-  %b1 = load float, float* %p1, align 4
-  %b2 = load float, float* %p2, align 4
-  %b3 = load float, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %b, i32 1
+  %p2 = getelementptr inbounds float, ptr %b, i32 2
+  %p3 = getelementptr inbounds float, ptr %b, i32 3
+  %b0 = load float, ptr %b, align 4
+  %b1 = load float, ptr %p1, align 4
+  %b2 = load float, ptr %p2, align 4
+  %b3 = load float, ptr %p3, align 4
   %c0 = fcmp ord float %a0, %b0
   %c1 = fcmp uno float %b1, %a1
   %c2 = fcmp uno float %b2, %a2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
index 6338b151b97ae..0b9a053d49204 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
@@ -1,37 +1,35 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
-; int foo(double * restrict A, double * restrict B, double G) {
+; int foo(ptr restrict A, ptr restrict B, double G) {
 ;   A[0] = (B[10] ? G : 1);
 ;   A[1] = (B[11] ? G : 1);
 ; }
 
-define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) {
+define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, double %G) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 10
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 10
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %arrayidx = getelementptr inbounds double, double* %B, i64 10
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %B, i64 10
+  %0 = load double, ptr %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   %cond = select i1 %tobool, double %G, double 1.000000e+00
-  store double %cond, double* %A, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %B, i64 11
-  %1 = load double, double* %arrayidx2, align 8
+  store double %cond, ptr %A, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %B, i64 11
+  %1 = load double, ptr %arrayidx2, align 8
   %tobool3 = fcmp une double %1, 0.000000e+00
   %cond7 = select i1 %tobool3, double %G, double 1.000000e+00
-  %arrayidx8 = getelementptr inbounds double, double* %A, i64 1
-  store double %cond7, double* %arrayidx8, align 8
+  %arrayidx8 = getelementptr inbounds double, ptr %A, i64 1
+  store double %cond7, ptr %arrayidx8, align 8
   ret i32 undef
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
index abe08f7c693bb..2d6ea13b1dd6f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -22,7 +22,7 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
 ; SSE-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
 ; SSE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; SSE-NEXT:    [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]]
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr @cle, align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @splat(
@@ -32,41 +32,41 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
 ; AVX-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; AVX-NEXT:    [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]]
-; AVX-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16
+; AVX-NEXT:    store <16 x i8> [[TMP4]], ptr @cle, align 16
 ; AVX-NEXT:    ret void
 ;
   %1 = xor i8 %c, %a
-  store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
+  store i8 %1, ptr @cle, align 16
   %2 = xor i8 %a, %c
-  store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
+  store i8 %2, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 1)
   %3 = xor i8 %a, %c
-  store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
+  store i8 %3, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 2)
   %4 = xor i8 %a, %c
-  store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
+  store i8 %4, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 3)
   %5 = xor i8 %c, %a
-  store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
+  store i8 %5, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 4)
   %6 = xor i8 %c, %b
-  store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
+  store i8 %6, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 5)
   %7 = xor i8 %c, %a
-  store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
+  store i8 %7, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 6)
   %8 = xor i8 %c, %b
-  store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
+  store i8 %8, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 7)
   %9 = xor i8 %a, %c
-  store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
+  store i8 %9, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 8)
   %10 = xor i8 %a, %c
-  store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
+  store i8 %10, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 9)
   %11 = xor i8 %a, %c
-  store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
+  store i8 %11, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 10)
   %12 = xor i8 %a, %c
-  store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
+  store i8 %12, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 11)
   %13 = xor i8 %a, %c
-  store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
+  store i8 %13, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 12)
   %14 = xor i8 %a, %c
-  store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
+  store i8 %14, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 13)
   %15 = xor i8 %a, %c
-  store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
+  store i8 %15, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 14)
   %16 = xor i8 %a, %c
-  store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
+  store i8 %16, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 15)
   ret void
 }
 
@@ -80,13 +80,13 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
 ; SSE-NEXT:    [[ADD3:%.*]] = add i32 [[A]], [[C]]
 ; SSE-NEXT:    [[ADD4:%.*]] = add i32 [[C]], [[A]]
 ; SSE-NEXT:    [[TMP1:%.*]] = xor i32 [[ADD1]], [[A]]
-; SSE-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
+; SSE-NEXT:    store i32 [[TMP1]], ptr @cle32, align 16
 ; SSE-NEXT:    [[TMP2:%.*]] = xor i32 [[B:%.*]], [[ADD2]]
-; SSE-NEXT:    store i32 [[TMP2]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[TMP2]], ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 1), align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = xor i32 [[C]], [[ADD3]]
-; SSE-NEXT:    store i32 [[TMP3]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2), align 4
+; SSE-NEXT:    store i32 [[TMP3]], ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = xor i32 [[A]], [[ADD4]]
-; SSE-NEXT:    store i32 [[TMP4]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3), align 4
+; SSE-NEXT:    store i32 [[TMP4]], ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @same_opcode_on_one_side(
@@ -99,7 +99,7 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
 ; AVX-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2
 ; AVX-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
 ; AVX-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]]
-; AVX-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
+; AVX-NEXT:    store <4 x i32> [[TMP6]], ptr @cle32, align 16
 ; AVX-NEXT:    ret void
 ;
   %add1 = add i32 %c, %a
@@ -107,12 +107,12 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
   %add3 = add i32 %a, %c
   %add4 = add i32 %c, %a
   %1 = xor i32 %add1, %a
-  store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
+  store i32 %1, ptr @cle32, align 16
   %2 = xor i32 %b, %add2
-  store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
+  store i32 %2, ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 1)
   %3 = xor i32 %c, %add3
-  store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
+  store i32 %3, ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 2)
   %4 = xor i32 %a, %add4
-  store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
+  store i32 %4, ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 3)
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index 5fc0b1e9f9203..2b27a7aed1be2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
 
-define void @reduce_compare(double* nocapture %A, i32 %n) {
+define void @reduce_compare(ptr nocapture %A, i32 %n) {
 ; CHECK-LABEL: @reduce_compare(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
@@ -16,9 +16,8 @@ define void @reduce_compare(double* nocapture %A, i32 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 7.000000e+00, double 4.000000e+00>
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], <double 5.000000e+00, double 9.000000e+00>
@@ -27,7 +26,7 @@ define void @reduce_compare(double* nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str)
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
@@ -44,14 +43,14 @@ entry:
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %0 = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %0
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %0
+  %1 = load double, ptr %arrayidx, align 8
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
   %2 = or i64 %0, 1
-  %arrayidx6 = getelementptr inbounds double, double* %A, i64 %2
-  %3 = load double, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %A, i64 %2
+  %3 = load double, ptr %arrayidx6, align 8
   %mul8 = fmul double %conv, %3
   %mul9 = fmul double %mul8, 4.000000e+00
   %add10 = fadd double %mul9, 9.000000e+00
@@ -59,7 +58,7 @@ for.body:                                         ; preds = %for.inc, %entry
   br i1 %cmp11, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+  %call = tail call i32 (ptr, ...) @printf(ptr @.str)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -72,7 +71,7 @@ for.end:                                          ; preds = %for.inc
   ret void
 }
 
-declare i32 @printf(i8* nocapture, ...)
+declare i32 @printf(ptr nocapture, ...)
 
 ; PR41312 - the order of the reduction ops should not prevent forming a reduction.
 ; The 'wrong' member of the reduction requires a greater cost if grouped with the

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
index 7419c70a188bd..63eb5a4e07ba1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -13,55 +13,52 @@ define void @foo_3double(i32 %u) #0 {
 ; CHECK-LABEL: @foo_3double(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], ptr [[U_ADDR]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 3
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD24:%.*]] = add nsw i32 [[MUL]], 2
 ; CHECK-NEXT:    [[IDXPROM25:%.*]] = sext i32 [[ADD24]] to i64
-; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM25]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load double, double* [[ARRAYIDX26]], align 8
-; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM25]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[ARRAYIDX30]], align 8
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 [[IDXPROM25]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load double, ptr [[ARRAYIDX26]], align 8
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 [[IDXPROM25]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load double, ptr [[ARRAYIDX30]], align 8
 ; CHECK-NEXT:    [[ADD31:%.*]] = fadd double [[TMP6]], [[TMP7]]
-; CHECK-NEXT:    store double [[ADD31]], double* [[ARRAYIDX26]], align 8
+; CHECK-NEXT:    store double [[ADD31]], ptr [[ARRAYIDX26]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %u.addr = alloca i32, align 4
-  store i32 %u, i32* %u.addr, align 4
+  store i32 %u, ptr %u.addr, align 4
   %mul = mul nsw i32 %u, 3
   %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx4 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom
+  %1 = load double, ptr %arrayidx4, align 8
   %add5 = fadd double %0, %1
-  store double %add5, double* %arrayidx, align 8
+  store double %add5, ptr %arrayidx, align 8
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
-  %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double, double* %arrayidx13, align 8
-  %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double, double* %arrayidx17, align 8
+  %arrayidx13 = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom12
+  %2 = load double, ptr %arrayidx13, align 8
+  %arrayidx17 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom12
+  %3 = load double, ptr %arrayidx17, align 8
   %add18 = fadd double %2, %3
-  store double %add18, double* %arrayidx13, align 8
+  store double %add18, ptr %arrayidx13, align 8
   %add24 = add nsw i32 %mul, 2
   %idxprom25 = sext i32 %add24 to i64
-  %arrayidx26 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom25
-  %4 = load double, double* %arrayidx26, align 8
-  %arrayidx30 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom25
-  %5 = load double, double* %arrayidx30, align 8
+  %arrayidx26 = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom25
+  %4 = load double, ptr %arrayidx26, align 8
+  %arrayidx30 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom25
+  %5 = load double, ptr %arrayidx30, align 8
   %add31 = fadd double %4, %5
-  store double %add31, double* %arrayidx26, align 8
+  store double %add31, ptr %arrayidx26, align 8
   ret void
 }
 
@@ -73,39 +70,36 @@ define void @foo_2double(i32 %u) #0 {
 ; CHECK-LABEL: @foo_2double(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], ptr [[U_ADDR]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %u.addr = alloca i32, align 4
-  store i32 %u, i32* %u.addr, align 4
+  store i32 %u, ptr %u.addr, align 4
   %mul = mul nsw i32 %u, 2
   %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx4 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom
+  %1 = load double, ptr %arrayidx4, align 8
   %add5 = fadd double %0, %1
-  store double %add5, double* %arrayidx, align 8
+  store double %add5, ptr %arrayidx, align 8
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
-  %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double, double* %arrayidx13, align 8
-  %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double, double* %arrayidx17, align 8
+  %arrayidx13 = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom12
+  %2 = load double, ptr %arrayidx13, align 8
+  %arrayidx17 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom12
+  %3 = load double, ptr %arrayidx17, align 8
   %add18 = fadd double %2, %3
-  store double %add18, double* %arrayidx13, align 8
+  store double %add18, ptr %arrayidx13, align 8
   ret void
 }
 
@@ -115,71 +109,68 @@ define void @foo_4float(i32 %u) #0 {
 ; CHECK-LABEL: @foo_4float(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], ptr [[U_ADDR]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[U]], 4
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x float], ptr @C, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x float], ptr @D, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %u.addr = alloca i32, align 4
-  store i32 %u, i32* %u.addr, align 4
+  store i32 %u, ptr %u.addr, align 4
   %mul = mul nsw i32 %u, 4
   %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx4 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom
-  %1 = load float, float* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds [2000 x float], ptr @C, i32 0, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds [2000 x float], ptr @D, i32 0, i64 %idxprom
+  %1 = load float, ptr %arrayidx4, align 4
   %add5 = fadd float %0, %1
-  store float %add5, float* %arrayidx, align 4
+  store float %add5, ptr %arrayidx, align 4
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
-  %arrayidx13 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom12
-  %2 = load float, float* %arrayidx13, align 4
-  %arrayidx17 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom12
-  %3 = load float, float* %arrayidx17, align 4
+  %arrayidx13 = getelementptr inbounds [2000 x float], ptr @C, i32 0, i64 %idxprom12
+  %2 = load float, ptr %arrayidx13, align 4
+  %arrayidx17 = getelementptr inbounds [2000 x float], ptr @D, i32 0, i64 %idxprom12
+  %3 = load float, ptr %arrayidx17, align 4
   %add18 = fadd float %2, %3
-  store float %add18, float* %arrayidx13, align 4
+  store float %add18, ptr %arrayidx13, align 4
   %add24 = add nsw i32 %mul, 2
   %idxprom25 = sext i32 %add24 to i64
-  %arrayidx26 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom25
-  %4 = load float, float* %arrayidx26, align 4
-  %arrayidx30 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom25
-  %5 = load float, float* %arrayidx30, align 4
+  %arrayidx26 = getelementptr inbounds [2000 x float], ptr @C, i32 0, i64 %idxprom25
+  %4 = load float, ptr %arrayidx26, align 4
+  %arrayidx30 = getelementptr inbounds [2000 x float], ptr @D, i32 0, i64 %idxprom25
+  %5 = load float, ptr %arrayidx30, align 4
   %add31 = fadd float %4, %5
-  store float %add31, float* %arrayidx26, align 4
+  store float %add31, ptr %arrayidx26, align 4
   %add37 = add nsw i32 %mul, 3
   %idxprom38 = sext i32 %add37 to i64
-  %arrayidx39 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom38
-  %6 = load float, float* %arrayidx39, align 4
-  %arrayidx43 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom38
-  %7 = load float, float* %arrayidx43, align 4
+  %arrayidx39 = getelementptr inbounds [2000 x float], ptr @C, i32 0, i64 %idxprom38
+  %6 = load float, ptr %arrayidx39, align 4
+  %arrayidx43 = getelementptr inbounds [2000 x float], ptr @D, i32 0, i64 %idxprom38
+  %7 = load float, ptr %arrayidx43, align 4
   %add44 = fadd float %6, %7
-  store float %add44, float* %arrayidx39, align 4
+  store float %add44, ptr %arrayidx39, align 4
   ret void
 }
 
 ; Similar to the previous tests, but now we are dealing with AddRec SCEV.
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo_loop(double* %A, i32 %n) #0 {
+define i32 @foo_loop(ptr %A, i32 %n) #0 {
 ; CHECK-LABEL: @foo_loop(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 8
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[SUM:%.*]] = alloca double, align 8
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store double* [[A:%.*]], double** [[A_ADDR]], align 8
-; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_ADDR]], align 4
-; CHECK-NEXT:    store double 0.000000e+00, double* [[SUM]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+; CHECK-NEXT:    store ptr [[A:%.*]], ptr [[A_ADDR]], align 8
+; CHECK-NEXT:    store i32 [[N:%.*]], ptr [[N_ADDR]], align 4
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[SUM]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[N]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.lr.ph:
@@ -189,17 +180,16 @@ define i32 @foo_loop(double* %A, i32 %n) #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD7:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[ADD6:%.*]] = fadd double [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[ADD7]] = fadd double [[TMP1]], [[ADD6]]
-; CHECK-NEXT:    store double [[ADD7]], double* [[SUM]], align 8
+; CHECK-NEXT:    store double [[ADD7]], ptr [[SUM]], align 8
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[TMP0]], 1
-; CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+; CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
 ; CHECK:       for.cond.for.end_crit_edge:
@@ -211,14 +201,14 @@ define i32 @foo_loop(double* %A, i32 %n) #0 {
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
 entry:
-  %A.addr = alloca double*, align 8
+  %A.addr = alloca ptr, align 8
   %n.addr = alloca i32, align 4
   %sum = alloca double, align 8
   %i = alloca i32, align 4
-  store double* %A, double** %A.addr, align 8
-  store i32 %n, i32* %n.addr, align 4
-  store double 0.000000e+00, double* %sum, align 8
-  store i32 0, i32* %i, align 4
+  store ptr %A, ptr %A.addr, align 8
+  store i32 %n, ptr %n.addr, align 4
+  store double 0.000000e+00, ptr %sum, align 8
+  store i32 0, ptr %i, align 4
   %cmp1 = icmp slt i32 0, %n
   br i1 %cmp1, label %for.body.lr.ph, label %for.end
 
@@ -230,19 +220,19 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %1 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add7, %for.body ]
   %mul = mul nsw i32 %0, 2
   %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom
-  %2 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %idxprom
+  %2 = load double, ptr %arrayidx, align 8
   %mul1 = fmul double 7.000000e+00, %2
   %add = add nsw i32 %mul, 1
   %idxprom3 = sext i32 %add to i64
-  %arrayidx4 = getelementptr inbounds double, double* %A, i64 %idxprom3
-  %3 = load double, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %A, i64 %idxprom3
+  %3 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double 7.000000e+00, %3
   %add6 = fadd double %mul1, %mul5
   %add7 = fadd double %1, %add6
-  store double %add7, double* %sum, align 8
+  store double %add7, ptr %sum, align 8
   %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
 
@@ -263,41 +253,38 @@ define void @foo_2double_non_power_of_2(i32 %u) #0 {
 ; CHECK-LABEL: @foo_2double_non_power_of_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], ptr [[U_ADDR]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[U]], 6
 ; CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[MUL]], 6
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD6]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %u.addr = alloca i32, align 4
-  store i32 %u, i32* %u.addr, align 4
+  store i32 %u, ptr %u.addr, align 4
   %mul = mul i32 %u, 6
   %add6 = add i32 %mul, 6
   %idxprom = sext i32 %add6 to i64
-  %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx4 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom
+  %1 = load double, ptr %arrayidx4, align 8
   %add5 = fadd double %0, %1
-  store double %add5, double* %arrayidx, align 8
+  store double %add5, ptr %arrayidx, align 8
   %add7 = add i32 %mul, 7
   %idxprom12 = sext i32 %add7 to i64
-  %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double, double* %arrayidx13, align 8
-  %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double, double* %arrayidx17, align 8
+  %arrayidx13 = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom12
+  %2 = load double, ptr %arrayidx13, align 8
+  %arrayidx17 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom12
+  %3 = load double, ptr %arrayidx17, align 8
   %add18 = fadd double %2, %3
-  store double %add18, double* %arrayidx13, align 8
+  store double %add18, ptr %arrayidx13, align 8
   ret void
 }
 
@@ -307,41 +294,38 @@ define void @foo_2double_non_power_of_2_zext(i32 %u) #0 {
 ; CHECK-LABEL: @foo_2double_non_power_of_2_zext(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[U:%.*]], i32* [[U_ADDR]], align 4
+; CHECK-NEXT:    store i32 [[U:%.*]], ptr [[U_ADDR]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[U]], 6
 ; CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[MUL]], 6
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[ADD6]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %u.addr = alloca i32, align 4
-  store i32 %u, i32* %u.addr, align 4
+  store i32 %u, ptr %u.addr, align 4
   %mul = mul i32 %u, 6
   %add6 = add i32 %mul, 6
   %idxprom = zext i32 %add6 to i64
-  %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx4 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom
+  %1 = load double, ptr %arrayidx4, align 8
   %add5 = fadd double %0, %1
-  store double %add5, double* %arrayidx, align 8
+  store double %add5, ptr %arrayidx, align 8
   %add7 = add i32 %mul, 7
   %idxprom12 = zext i32 %add7 to i64
-  %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double, double* %arrayidx13, align 8
-  %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double, double* %arrayidx17, align 8
+  %arrayidx13 = getelementptr inbounds [2000 x double], ptr @A, i32 0, i64 %idxprom12
+  %2 = load double, ptr %arrayidx13, align 8
+  %arrayidx17 = getelementptr inbounds [2000 x double], ptr @B, i32 0, i64 %idxprom12
+  %3 = load double, ptr %arrayidx17, align 8
   %add18 = fadd double %2, %3
-  store double %add18, double* %arrayidx13, align 8
+  store double %add18, ptr %arrayidx13, align 8
   ret void
 }
 
@@ -349,17 +333,17 @@ entry:
 ; Alternatively, this is like foo_loop, but with a non-power-of-2 factor and
 ; potential wrapping (both indices wrap or both don't in the same time)
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo_loop_non_power_of_2(double* %A, i32 %n) #0 {
+define i32 @foo_loop_non_power_of_2(ptr %A, i32 %n) #0 {
 ; CHECK-LABEL: @foo_loop_non_power_of_2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double*, align 8
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
 ; CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[SUM:%.*]] = alloca double, align 8
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store double* [[A:%.*]], double** [[A_ADDR]], align 8
-; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_ADDR]], align 4
-; CHECK-NEXT:    store double 0.000000e+00, double* [[SUM]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[I]], align 4
+; CHECK-NEXT:    store ptr [[A:%.*]], ptr [[A_ADDR]], align 8
+; CHECK-NEXT:    store i32 [[N:%.*]], ptr [[N_ADDR]], align 4
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[SUM]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[N]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.lr.ph:
@@ -370,17 +354,16 @@ define i32 @foo_loop_non_power_of_2(double* %A, i32 %n) #0 {
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TMP0]], 12
 ; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[MUL]], 5
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[ADD_5]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 7.000000e+00, double 7.000000e+00>, [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[ADD6:%.*]] = fadd double [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[ADD7]] = fadd double [[TMP1]], [[ADD6]]
-; CHECK-NEXT:    store double [[ADD7]], double* [[SUM]], align 8
+; CHECK-NEXT:    store double [[ADD7]], ptr [[SUM]], align 8
 ; CHECK-NEXT:    [[INC]] = add i32 [[TMP0]], 1
-; CHECK-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+; CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]]
 ; CHECK:       for.cond.for.end_crit_edge:
@@ -392,14 +375,14 @@ define i32 @foo_loop_non_power_of_2(double* %A, i32 %n) #0 {
 ; CHECK-NEXT:    ret i32 [[CONV]]
 ;
 entry:
-  %A.addr = alloca double*, align 8
+  %A.addr = alloca ptr, align 8
   %n.addr = alloca i32, align 4
   %sum = alloca double, align 8
   %i = alloca i32, align 4
-  store double* %A, double** %A.addr, align 8
-  store i32 %n, i32* %n.addr, align 4
-  store double 0.000000e+00, double* %sum, align 8
-  store i32 0, i32* %i, align 4
+  store ptr %A, ptr %A.addr, align 8
+  store i32 %n, ptr %n.addr, align 4
+  store double 0.000000e+00, ptr %sum, align 8
+  store i32 0, ptr %i, align 4
   %cmp1 = icmp slt i32 0, %n
   br i1 %cmp1, label %for.body.lr.ph, label %for.end
 
@@ -412,19 +395,19 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %mul = mul i32 %0, 12
   %add.5 = add i32 %mul, 5
   %idxprom = sext i32 %add.5 to i64
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom
-  %2 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %idxprom
+  %2 = load double, ptr %arrayidx, align 8
   %mul1 = fmul double 7.000000e+00, %2
   %add.6 = add i32 %mul, 6
   %idxprom3 = sext i32 %add.6 to i64
-  %arrayidx4 = getelementptr inbounds double, double* %A, i64 %idxprom3
-  %3 = load double, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %A, i64 %idxprom3
+  %3 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double 7.000000e+00, %3
   %add6 = fadd double %mul1, %mul5
   %add7 = fadd double %1, %add6
-  store double %add7, double* %sum, align 8
+  store double %add7, ptr %sum, align 8
   %inc = add i32 %0, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
 
@@ -442,7 +425,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; with !{!"clang version 7.0.0 (trunk 337339) (llvm/trunk 337344)"} and stripping off
 ; the !tbaa metadata nodes to fit the rest of the test file, where `cat main.c` is:
 ;
-;  double bar(double *a, unsigned n) {
+;  double bar(ptr a, unsigned n) {
 ;    double x = 0.0;
 ;    double y = 0.0;
 ;    for (unsigned i = 0; i < n; i += 2) {
@@ -456,7 +439,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ;
 ; Make sure we are able to vectorize this from now on:
 ;
-define double @bar(double* nocapture readonly %a, i32 %n) local_unnamed_addr #0 {
+define double @bar(ptr nocapture readonly %a, i32 %n) local_unnamed_addr #0 {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -471,9 +454,8 @@ define double @bar(double* nocapture readonly %a, i32 %n) local_unnamed_addr #0
 ; CHECK-NEXT:    [[I_018:%.*]] = phi i32 [ [[ADD5:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x double> [ [[TMP6]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_018]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[ADD5]] = add i32 [[I_018]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ADD5]], [[N]]
@@ -494,13 +476,13 @@ for.body:                                         ; preds = %entry, %for.body
   %y.017 = phi double [ %add4, %for.body ], [ 0.000000e+00, %entry ]
   %x.016 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
   %idxprom = zext i32 %i.018 to i64
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %x.016, %0
   %add1 = or i32 %i.018, 1
   %idxprom2 = zext i32 %add1 to i64
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 %idxprom2
-  %1 = load double, double* %arrayidx3, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 %idxprom2
+  %1 = load double, ptr %arrayidx3, align 8
   %add4 = fadd double %y.017, %1
   %add5 = add i32 %i.018, 2
   %cmp = icmp ult i32 %add5, %n
@@ -513,29 +495,29 @@ for.body:                                         ; preds = %entry, %for.body
 @g1 = external global i32, align 4
 @g2 = external global i32, align 4
 
-define void @PR33958(i32** nocapture %p) {
+define void @PR33958(ptr nocapture %p) {
 ; CHECK-LABEL: @PR33958(
-; CHECK-NEXT:    store i32* @g1, i32** [[P:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32*, i32** [[P]], i64 1
-; CHECK-NEXT:    store i32* @g2, i32** [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    store ptr @g1, ptr [[P:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds ptr, ptr [[P]], i64 1
+; CHECK-NEXT:    store ptr @g2, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  store i32* @g1, i32** %p, align 8
-  %arrayidx1 = getelementptr inbounds i32*, i32** %p, i64 1
-  store i32* @g2, i32** %arrayidx1, align 8
+  store ptr @g1, ptr %p, align 8
+  %arrayidx1 = getelementptr inbounds ptr, ptr %p, i64 1
+  store ptr @g2, ptr %arrayidx1, align 8
   ret void
 }
 
-define void @store_constant_expression(i64* %p) {
+define void @store_constant_expression(ptr %p) {
 ; CHECK-LABEL: @store_constant_expression(
-; CHECK-NEXT:    store i64 ptrtoint (i32* @g1 to i64), i64* [[P:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
-; CHECK-NEXT:    store i64 ptrtoint (i32* @g2 to i64), i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    store i64 ptrtoint (ptr @g1 to i64), ptr [[P:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
+; CHECK-NEXT:    store i64 ptrtoint (ptr @g2 to i64), ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  store i64 ptrtoint (i32* @g1 to i64), i64* %p, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %p, i64 1
-  store i64 ptrtoint (i32* @g2 to i64), i64* %arrayidx1, align 8
+  store i64 ptrtoint (ptr @g1 to i64), ptr %p, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %p, i64 1
+  store i64 ptrtoint (ptr @g2 to i64), ptr %arrayidx1, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
index c1e82e3ce1853..fca26cbd1c459 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
@@ -5,43 +5,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; We will keep trying to vectorize the basic block even we already find vectorized store.
-define void @test1(double* %a, double* %b, double* %c, double* %d) {
+define void @test1(ptr %a, ptr %b, ptr %c, ptr %d) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[A]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[B]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i32>, ptr [[B]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <4 x i32> [[TMP7]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[D:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 8
+; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[D:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
-  %0 = bitcast double* %a to <4 x i32>*
-  %1 = load <4 x i32>, <4 x i32>* %0, align 8
-  %2 = bitcast double* %b to <4 x i32>*
-  %3 = load <4 x i32>, <4 x i32>* %2, align 8
-  %4 = mul <4 x i32> %1, %3
-  %5 = bitcast double* %d to <4 x i32>*
-  store <4 x i32> %4, <4 x i32>* %5, align 8
+  store double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8
+  %0 = load <4 x i32>, ptr %a, align 8
+  %1 = load <4 x i32>, ptr %b, align 8
+  %2 = mul <4 x i32> %0, %1
+  store <4 x i32> %2, ptr %d, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll
index 648a1df5b1cb1..178a67e35a33f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/control-dependence.ll
@@ -6,200 +6,184 @@ declare i64 @may_inf_loop_rw() nounwind
 declare i64 @may_throw() willreturn
 
 ; Base case with no interesting control dependencies
-define void @test_no_control(i64* %a, i64* %b, i64* %c) {
+define void @test_no_control(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_no_control(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %v1 = load i64, ptr %a
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
-define void @test1(i64* %a, i64* %b, i64* %c) {
+define void @test1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %v1 = load i64, ptr %a
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
-  %c1 = load i64, i64* %c
+  %c1 = load i64, ptr %c
   %c2 = call i64 @may_inf_loop_ro()
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
-define void @test2(i64* %a, i64* %b, i64* %c) {
+define void @test2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %c1 = load i64, i64* %c
+  %c1 = load i64, ptr %c
   %c2 = call i64 @may_inf_loop_ro()
 
-  %v1 = load i64, i64* %a
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %v1 = load i64, ptr %a
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
-define void @test3(i64* %a, i64* %b, i64* %c) {
+define void @test3(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  %c1 = load i64, i64* %c
+  %v1 = load i64, ptr %a
+  %c1 = load i64, ptr %c
   %add1 = add i64 %v1, %c1
 
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
   %c2 = call i64 @may_inf_loop_ro()
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
-define void @test4(i64* %a, i64* %b, i64* %c) {
+define void @test4(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  %c1 = load i64, i64* %c
+  %v1 = load i64, ptr %a
+  %c1 = load i64, ptr %c
   %add1 = add i64 %v1, %c1
 
   %c2 = call i64 @may_inf_loop_ro()
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
-define void @test5(i64* %a, i64* %b, i64* %c) {
+define void @test5(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[C2:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[C1:%.*]] = load i64, i64* [[C:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[C1:%.*]] = load i64, ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[C1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[C2]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
   %c2 = call i64 @may_inf_loop_ro()
   %add2 = add i64 %v2, %c2
 
-  %v1 = load i64, i64* %a
-  %c1 = load i64, i64* %c
+  %v1 = load i64, ptr %a
+  %c1 = load i64, ptr %c
   %add1 = add i64 %v1, %c1
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
-define void @test6(i64* %a, i64* %b, i64* %c) {
+define void @test6(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
+  %v1 = load i64, ptr %a
   call i64 @may_inf_loop_ro()
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
@@ -209,180 +193,170 @@ define void @test6(i64* %a, i64* %b, i64* %c) {
 ; we'd have hoisted a potentially faulting load above a potentially infinite
 ; call and thus have introduced a possible fault into a program which didn't
 ; previously exist.
-define void @test7(i64* %a, i64* %b, i64* %c) {
+define void @test7(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1
-; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A]], align 4
-; CHECK-NEXT:    store i64 0, i64* [[A]], align 4
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
+; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A]], align 4
+; CHECK-NEXT:    store i64 0, ptr [[A]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
-; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  store i64 0, i64* %a
+  %v1 = load i64, ptr %a
+  store i64 0, ptr %a
   call i64 @may_inf_loop_ro()
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
 ; Same as test7, but with a throwing call
-define void @test8(i64* %a, i64* %b, i64* %c) {
+define void @test8(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1
-; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A]], align 4
-; CHECK-NEXT:    store i64 0, i64* [[A]], align 4
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
+; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A]], align 4
+; CHECK-NEXT:    store i64 0, ptr [[A]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_throw() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  store i64 0, i64* %a
+  %v1 = load i64, ptr %a
+  store i64 0, ptr %a
   call i64 @may_throw() readonly
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
 ; Same as test8, but with a readwrite maythrow call
-define void @test9(i64* %a, i64* %b, i64* %c) {
+define void @test9(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A:%.*]], i32 1
-; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A]], align 4
-; CHECK-NEXT:    store i64 0, i64* [[A]], align 4
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A:%.*]], i32 1
+; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A]], align 4
+; CHECK-NEXT:    store i64 0, ptr [[A]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_throw()
-; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[V2]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  store i64 0, i64* %a
+  %v1 = load i64, ptr %a
+  store i64 0, ptr %a
   call i64 @may_throw()
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %v1, %c1
   %add2 = add i64 %v2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
 ; A variant of test7 which shows the same problem with a non-load instruction
-define void @test10(i64* %a, i64* %b, i64* %c) {
+define void @test10(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[V1:%.*]] = load i64, i64* [[A:%.*]], align 4
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, i64* [[A]], i32 1
-; CHECK-NEXT:    [[V2:%.*]] = load i64, i64* [[A2]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = load i64, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr i64, ptr [[A]], i32 1
+; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A2]], align 4
 ; CHECK-NEXT:    [[U1:%.*]] = udiv i64 200, [[V1]]
-; CHECK-NEXT:    store i64 [[U1]], i64* [[A]], align 4
+; CHECK-NEXT:    store i64 [[U1]], ptr [[A]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
 ; CHECK-NEXT:    [[U2:%.*]] = udiv i64 200, [[V2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %v1 = load i64, i64* %a
-  %a2 = getelementptr i64, i64* %a, i32 1
-  %v2 = load i64, i64* %a2
+  %v1 = load i64, ptr %a
+  %a2 = getelementptr i64, ptr %a, i32 1
+  %v2 = load i64, ptr %a2
 
   %u1 = udiv i64 200, %v1
-  store i64 %u1, i64* %a
+  store i64 %u1, ptr %a
   call i64 @may_inf_loop_ro()
   %u2 = udiv i64 200, %v2
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %u1, %c1
   %add2 = add i64 %u2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }
 
 ; Variant of test10 block invariant operands to the udivs
 ; FIXME: This is wrong, we're hoisting a faulting udiv above an infinite loop.
-define void @test11(i64 %x, i64 %y, i64* %b, i64* %c) {
+define void @test11(i64 %x, i64 %y, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test11(
 ; CHECK-NEXT:    [[U1:%.*]] = udiv i64 200, [[X:%.*]]
-; CHECK-NEXT:    store i64 [[U1]], i64* [[B:%.*]], align 4
+; CHECK-NEXT:    store i64 [[U1]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
 ; CHECK-NEXT:    [[U2:%.*]] = udiv i64 200, [[Y:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[U1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[U2]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[TMP5]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[B]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[B]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %u1 = udiv i64 200, %x
-  store i64 %u1, i64* %b
+  store i64 %u1, ptr %b
   call i64 @may_inf_loop_ro()
   %u2 = udiv i64 200, %y
 
-  %c1 = load i64, i64* %c
-  %ca2 = getelementptr i64, i64* %c, i32 1
-  %c2 = load i64, i64* %ca2
+  %c1 = load i64, ptr %c
+  %ca2 = getelementptr i64, ptr %c, i32 1
+  %c2 = load i64, ptr %ca2
   %add1 = add i64 %u1, %c1
   %add2 = add i64 %u2, %c2
 
-  store i64 %add1, i64* %b
-  %b2 = getelementptr i64, i64* %b, i32 1
-  store i64 %add2, i64* %b2
+  store i64 %add1, ptr %b
+  %b2 = getelementptr i64, ptr %b, i32 1
+  store i64 %add2, ptr %b2
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
index ce3d2d47e4328..607d7f7888784 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -4,13 +4,13 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, i16*, i8*, i8*, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] }
+%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, ptr, ptr, ptr, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] }
 %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 }
 
-define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p) {
+define fastcc void @LzmaDec_DecodeReal2(ptr %p) {
 ; CHECK-LABEL: @LzmaDec_DecodeReal2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4
+; CHECK-NEXT:    [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], ptr [[P:%.*]], i64 0, i32 4
 ; CHECK-NEXT:    br label [[DO_BODY66_I:%.*]]
 ; CHECK:       do.body66.i:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -24,13 +24,12 @@ define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.
 ; CHECK-NEXT:    br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]]
 ; CHECK:       do.end1006.i:
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[RANGE20_I]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334, %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 4
-  %code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334, %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 5
+  %range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334, ptr %p, i64 0, i32 4
+  %code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334, ptr %p, i64 0, i32 5
   br label %do.body66.i
 
 do.body66.i:                                      ; preds = %do.cond.i, %entry
@@ -53,7 +52,7 @@ do.cond.i:                                        ; preds = %if.else.i, %do.body
 do.end1006.i:                                     ; preds = %do.cond.i
   %.range.4.i = select i1 undef, i32 undef, i32 %range.4.i
   %.code.4.i = select i1 undef, i32 undef, i32 %code.4.i
-  store i32 %.range.4.i, i32* %range20.i, align 4
-  store i32 %.code.4.i, i32* %code21.i, align 4
+  store i32 %.range.4.i, ptr %range20.i, align 4
+  store i32 %.code.4.i, ptr %code21.i, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
index 0724f663afd61..dd3b6fadacc4e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin13.3.0"
 define i32 @fn1() {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INIT:%.*]] = load double, double* @a, align 8
+; CHECK-NEXT:    [[INIT:%.*]] = load double, ptr @a, align 8
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi double [ [[ADD2:%.*]], [[LOOP]] ], [ [[INIT]], [[ENTRY:%.*]] ]
@@ -30,7 +30,7 @@ define i32 @fn1() {
 ; CHECK-NEXT:    ret i32 1
 ;
 entry:
-  %init = load double, double* @a, align 8
+  %init = load double, ptr @a, align 8
   br label %loop
 
 loop:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
index 42ece665bfc6b..6f5f933761092 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
@@ -6,27 +6,25 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 }
 
-define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* nocapture %info) {
+define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(ptr nocapture %info) {
 ; CHECK-LABEL: @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[M_NUMCONSTRAINTROWS4:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* [[INFO:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[NUB5:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* [[INFO]], i64 0, i32 1
+; CHECK-NEXT:    [[NUB5:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", ptr [[INFO:%.*]], i64 0, i32 1
 ; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE_I_1:%.*]], label [[IF_THEN7_1:%.*]]
 ; CHECK:       land.lhs.true.i.1:
 ; CHECK-NEXT:    br i1 undef, label [[FOR_INC_1:%.*]], label [[IF_THEN7_1]]
 ; CHECK:       if.then7.1:
-; CHECK-NEXT:    store i32 1, i32* [[M_NUMCONSTRAINTROWS4]], align 4
-; CHECK-NEXT:    store i32 5, i32* [[NUB5]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[INFO]], align 4
+; CHECK-NEXT:    store i32 5, ptr [[NUB5]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC_1]]
 ; CHECK:       for.inc.1:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 5>, [[IF_THEN7_1]] ], [ <i32 0, i32 6>, [[LAND_LHS_TRUE_I_1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[M_NUMCONSTRAINTROWS4]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP1]], ptr [[INFO]], align 4
 ; CHECK-NEXT:    unreachable
 ;
 entry:
@@ -36,8 +34,7 @@ if.then:                                          ; preds = %entry
   ret void
 
 if.else:                                          ; preds = %entry
-  %m_numConstraintRows4 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 0
-  %nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 1
+  %nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", ptr %info, i64 0, i32 1
   br i1 undef, label %land.lhs.true.i.1, label %if.then7.1
 
 land.lhs.true.i.1:                                ; preds = %if.else
@@ -45,18 +42,18 @@ land.lhs.true.i.1:                                ; preds = %if.else
 
 if.then7.1:                                       ; preds = %land.lhs.true.i.1, %if.else
   %inc.1 = add nsw i32 0, 1
-  store i32 %inc.1, i32* %m_numConstraintRows4, align 4
+  store i32 %inc.1, ptr %info, align 4
   %dec.1 = add nsw i32 6, -1
-  store i32 %dec.1, i32* %nub5, align 4
+  store i32 %dec.1, ptr %nub5, align 4
   br label %for.inc.1
 
 for.inc.1:                                        ; preds = %if.then7.1, %land.lhs.true.i.1
   %0 = phi i32 [ %dec.1, %if.then7.1 ], [ 6, %land.lhs.true.i.1 ]
   %1 = phi i32 [ %inc.1, %if.then7.1 ], [ 0, %land.lhs.true.i.1 ]
   %inc.2 = add nsw i32 %1, 1
-  store i32 %inc.2, i32* %m_numConstraintRows4, align 4
+  store i32 %inc.2, ptr %info, align 4
   %dec.2 = add nsw i32 %0, -1
-  store i32 %dec.2, i32* %nub5, align 4
+  store i32 %dec.2, ptr %nub5, align 4
   unreachable
 }
 
@@ -64,20 +61,19 @@ for.inc.1:                                        ; preds = %if.then7.1, %land.l
 %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 = type { [4 x float] }
 %class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 }
 
-define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this) {
+define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(ptr %this) {
 ; CHECK-LABEL: @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332:%.*]], %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* [[THIS:%.*]], i64 0, i32 2, i64 0, i32 0, i64 1
-; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332]], %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* [[THIS]], i64 0, i32 2, i64 0, i32 0, i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX36]], align 4
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332:%.*]], ptr [[THIS:%.*]], i64 0, i32 2, i64 0, i32 0, i64 1
+; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [[CLASS_GIM_TRIANGLE_CALCULATION_CACHE_9_34_69_94_119_144_179_189_264_284_332]], ptr [[THIS]], i64 0, i32 2, i64 0, i32 0, i64 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX36]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float poison>, float [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], undef
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x float> [[TMP2]], undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; CHECK-NEXT:    store float [[TMP4]], float* undef, align 4
+; CHECK-NEXT:    store float [[TMP4]], ptr undef, align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX26]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[ARRAYIDX26]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[IF_ELSE1609:%.*]], label [[IF_THEN1595:%.*]]
 ; CHECK:       if.then1595:
 ; CHECK-NEXT:    br i1 undef, label [[RETURN:%.*]], label [[FOR_BODY_LR_PH_I_I1702:%.*]]
@@ -89,18 +85,18 @@ define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
-  %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
-  %0 = load float, float* %arrayidx36, align 4
+  %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, ptr %this, i64 0, i32 2, i64 0, i32 0, i64 1
+  %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, ptr %this, i64 0, i32 2, i64 0, i32 0, i64 2
+  %0 = load float, ptr %arrayidx36, align 4
   %add587 = fadd float undef, undef
   %sub600 = fsub float %add587, undef
-  store float %sub600, float* undef, align 4
+  store float %sub600, ptr undef, align 4
   %sub613 = fsub float %add587, %sub600
-  store float %sub613, float* %arrayidx26, align 4
+  store float %sub613, ptr %arrayidx26, align 4
   %add626 = fadd float %0, undef
   %sub639 = fsub float %add626, undef
   %sub652 = fsub float %add626, %sub639
-  store float %sub652, float* %arrayidx36, align 4
+  store float %sub652, ptr %arrayidx36, align 4
   br i1 undef, label %if.else1609, label %if.then1595
 
 if.then1595:                                      ; preds = %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 60c2200caba48..4eb0be61b51c4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113 = type { [4 x float] }
 
 ; Function Attrs: ssp uwtable
-define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices) #0 align 2 {
+define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %vertices) #0 align 2 {
 ; CHECK-LABEL: @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[RETURN:%.*]], label [[IF_END:%.*]]
@@ -42,9 +42,7 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class.
 ; CHECK:       if.end332:
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ <float 0x3F847AE140000000, float 0x3F847AE140000000>, [[IF_THEN291]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[VERTICES:%.*]], align 4
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret void
@@ -109,10 +107,9 @@ if.end332:                                        ; preds = %if.then329, %if.end
   %dy276.1 = phi float [ undef, %if.then329 ], [ undef, %if.end327 ], [ 0x3F847AE140000000, %if.then291 ]
   %sub334 = fsub float %add294, %dx272.1
   %sub338 = fsub float %add297, %dy276.1
-  %arrayidx.i.i606 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113, %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 0
-  store float %sub334, float* %arrayidx.i.i606, align 4
-  %arrayidx3.i607 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113, %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 1
-  store float %sub338, float* %arrayidx3.i607, align 4
+  store float %sub334, ptr %vertices, align 4
+  %arrayidx3.i607 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113, ptr %vertices, i64 0, i32 0, i64 1
+  store float %sub338, ptr %arrayidx3.i607, align 4
   br label %return
 
 return:                                           ; preds = %if.end332, %for.end271, %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
index d9655b7444005..88b705f14c530 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
-define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
+define void @testfunc(ptr nocapture %dest, ptr nocapture readonly %src) {
 ; CHECK-LABEL: @testfunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -13,11 +13,11 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[ACC1_056]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[SHUFFLE]]
@@ -51,11 +51,11 @@ for.body:
   %acc1.056 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
   %s1.055 = phi float [ 0.000000e+00, %entry ], [ %cond.i40, %for.body ]
   %s0.054 = phi float [ 0.000000e+00, %entry ], [ %cond.i44, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds float, float* %dest, i64 %indvars.iv
-  store float %acc1.056, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %dest, i64 %indvars.iv
+  store float %acc1.056, ptr %arrayidx2, align 4
   %add = fadd float %s0.054, %0
   %add3 = fadd float %s1.055, %0
   %mul = fmul float %s0.054, 0.000000e+00

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
index 78e0209610c20..af354bb06ad46 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
@@ -3,19 +3,17 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
-%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { double*, double*, double*, double** }
+%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { ptr, ptr, ptr, ptr }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
+define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(ptr %__first, ptr nocapture %__last) {
 ; CHECK-LABEL: @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[_M_CUR2_I_I:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__FIRST:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_CUR2_I_I]], align 8
-; CHECK-NEXT:    [[_M_FIRST3_I_I:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__FIRST]], i64 0, i32 1
-; CHECK-NEXT:    [[_M_CUR2_I_I81:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__LAST:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[_M_CUR2_I_I81]], align 8
-; CHECK-NEXT:    [[_M_FIRST3_I_I83:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* [[__LAST]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load double*, double** [[_M_FIRST3_I_I83]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__FIRST:%.*]], align 8
+; CHECK-NEXT:    [[_M_FIRST3_I_I:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", ptr [[__FIRST]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__LAST:%.*]], align 8
+; CHECK-NEXT:    [[_M_FIRST3_I_I83:%.*]] = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", ptr [[__LAST]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[_M_FIRST3_I_I83]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT:%.*]], label [[WHILE_COND_I_PREHEADER:%.*]]
 ; CHECK:       while.cond.i.preheader:
 ; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
@@ -24,10 +22,10 @@ define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Dequ
 ; CHECK:       while.body.i:
 ; CHECK-NEXT:    br i1 undef, label [[_ZST13ADJACENT_FINDIST15_DEQUE_ITERATORIDRDPDEET_S4_S4__EXIT]], label [[WHILE_COND_I]]
 ; CHECK:       _ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi double* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[TMP2]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi double* [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ]
-; CHECK-NEXT:    store double* [[TMP4]], double** [[_M_CUR2_I_I]], align 8
-; CHECK-NEXT:    store double* [[TMP3]], double** [[_M_FIRST3_I_I]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[TMP2]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi ptr [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[WHILE_COND_I]] ], [ undef, [[WHILE_BODY_I]] ]
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[__FIRST]], align 8
+; CHECK-NEXT:    store ptr [[TMP3]], ptr [[_M_FIRST3_I_I]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[IF_THEN_I55:%.*]], label [[WHILE_COND:%.*]]
 ; CHECK:       if.then.i55:
 ; CHECK-NEXT:    br label [[WHILE_COND]]
@@ -35,13 +33,11 @@ define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Dequ
 ; CHECK-NEXT:    br label [[WHILE_COND]]
 ;
 entry:
-  %_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
-  %0 = load double*, double** %_M_cur2.i.i, align 8
-  %_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
-  %_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
-  %1 = load double*, double** %_M_cur2.i.i81, align 8
-  %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
-  %2 = load double*, double** %_M_first3.i.i83, align 8
+  %0 = load ptr, ptr %__first, align 8
+  %_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", ptr %__first, i64 0, i32 1
+  %1 = load ptr, ptr %__last, align 8
+  %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", ptr %__last, i64 0, i32 1
+  %2 = load ptr, ptr %_M_first3.i.i83, align 8
   br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
 
 while.cond.i.preheader:                           ; preds = %entry
@@ -54,10 +50,10 @@ while.body.i:                                     ; preds = %while.cond.i
   br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i
 
 _ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; preds = %while.body.i, %while.cond.i, %entry
-  %3 = phi double* [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ]
-  %4 = phi double* [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ]
-  store double* %4, double** %_M_cur2.i.i, align 8
-  store double* %3, double** %_M_first3.i.i, align 8
+  %3 = phi ptr [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ]
+  %4 = phi ptr [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ]
+  store ptr %4, ptr %__first, align 8
+  store ptr %3, ptr %_M_first3.i.i, align 8
   br i1 undef, label %if.then.i55, label %while.cond
 
 if.then.i55:                                      ; preds = %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
index be9c2a2bf2629..34682ca759cd4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
@@ -4,29 +4,29 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
- at a = common global i64* null, align 8
+ at a = common global ptr null, align 8
 
 ; Function Attrs: nounwind uwtable
 define i32 @fn1() {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** @a, align 8
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i64* [[ADD_PTR]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2
-; CHECK-NEXT:    store i64 [[TMP1]], i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i64* [[ARRAYIDX]] to i64
-; CHECK-NEXT:    store i64 [[TMP2]], i64* [[ADD_PTR]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a, align 8
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[ADD_PTR]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 2
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
+; CHECK-NEXT:    store i64 [[TMP2]], ptr [[ADD_PTR]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i64*, i64** @a, align 8
-  %add.ptr = getelementptr inbounds i64, i64* %0, i64 1
-  %1 = ptrtoint i64* %add.ptr to i64
-  %arrayidx = getelementptr inbounds i64, i64* %0, i64 2
-  store i64 %1, i64* %arrayidx, align 8
-  %2 = ptrtoint i64* %arrayidx to i64
-  store i64 %2, i64* %add.ptr, align 8
+  %0 = load ptr, ptr @a, align 8
+  %add.ptr = getelementptr inbounds i64, ptr %0, i64 1
+  %1 = ptrtoint ptr %add.ptr to i64
+  %arrayidx = getelementptr inbounds i64, ptr %0, i64 2
+  store i64 %1, ptr %arrayidx, align 8
+  %2 = ptrtoint ptr %arrayidx to i64
+  store i64 %2, ptr %add.ptr, align 8
   ret i32 undef
 }
 
@@ -43,10 +43,10 @@ entry:
   br label %body
 
 body:
-  %p.1.i19 = phi i8* [ undef, %entry ], [ %incdec.ptr.i.7, %body ]
-  %lsr.iv17 = phi i8* [ undef, %entry ], [ %scevgep113.7, %body ]
-  %incdec.ptr.i.7 = getelementptr inbounds i8, i8* undef, i32 1
-  %scevgep113.7 = getelementptr i8, i8* undef, i64 1
+  %p.1.i19 = phi ptr [ undef, %entry ], [ %incdec.ptr.i.7, %body ]
+  %lsr.iv17 = phi ptr [ undef, %entry ], [ %scevgep113.7, %body ]
+  %incdec.ptr.i.7 = getelementptr inbounds i8, ptr undef, i32 1
+  %scevgep113.7 = getelementptr i8, ptr undef, i64 1
   br label %body
 
 epilog:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll
index 4ffd022dce180..512b0ab45a2be 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll
@@ -100,49 +100,47 @@ if.end103:                                        ; preds = %if.else, %if.then88
 }
 
 
-define void @intrapred_luma([13 x i16]* %ptr) {
+define void @intrapred_luma(ptr %ptr) {
 ; CHECK-LABEL: @intrapred_luma(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV153:%.*]] = trunc i32 3 to i16
-; CHECK-NEXT:    [[ARRAYIDX154:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* [[PTR:%.*]], i64 0, i64 12
-; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX154]], align 8
-; CHECK-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* [[PTR]], i64 0, i64 11
-; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX155]], align 2
-; CHECK-NEXT:    [[ARRAYIDX156:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* [[PTR]], i64 0, i64 10
-; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX156]], align 4
+; CHECK-NEXT:    [[ARRAYIDX154:%.*]] = getelementptr inbounds [13 x i16], ptr [[PTR:%.*]], i64 0, i64 12
+; CHECK-NEXT:    store i16 [[CONV153]], ptr [[ARRAYIDX154]], align 8
+; CHECK-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds [13 x i16], ptr [[PTR]], i64 0, i64 11
+; CHECK-NEXT:    store i16 [[CONV153]], ptr [[ARRAYIDX155]], align 2
+; CHECK-NEXT:    [[ARRAYIDX156:%.*]] = getelementptr inbounds [13 x i16], ptr [[PTR]], i64 0, i64 10
+; CHECK-NEXT:    store i16 [[CONV153]], ptr [[ARRAYIDX156]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %conv153 = trunc i32 3 to i16
-  %arrayidx154 = getelementptr inbounds [13 x i16], [13 x i16]* %ptr, i64 0, i64 12
-  store i16 %conv153, i16* %arrayidx154, align 8
-  %arrayidx155 = getelementptr inbounds [13 x i16], [13 x i16]* %ptr, i64 0, i64 11
-  store i16 %conv153, i16* %arrayidx155, align 2
-  %arrayidx156 = getelementptr inbounds [13 x i16], [13 x i16]* %ptr, i64 0, i64 10
-  store i16 %conv153, i16* %arrayidx156, align 4
+  %arrayidx154 = getelementptr inbounds [13 x i16], ptr %ptr, i64 0, i64 12
+  store i16 %conv153, ptr %arrayidx154, align 8
+  %arrayidx155 = getelementptr inbounds [13 x i16], ptr %ptr, i64 0, i64 11
+  store i16 %conv153, ptr %arrayidx155, align 2
+  %arrayidx156 = getelementptr inbounds [13 x i16], ptr %ptr, i64 0, i64 10
+  store i16 %conv153, ptr %arrayidx156, align 4
   ret void
 }
 
-define fastcc void @dct36(double* %inbuf) {
+define fastcc void @dct36(ptr %inbuf) {
 ; CHECK-LABEL: @dct36(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds double, ptr [[INBUF:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[INBUF]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[ARRAYIDX44]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx41 = getelementptr inbounds double, double* %inbuf, i64 2
-  %arrayidx44 = getelementptr inbounds double, double* %inbuf, i64 1
-  %0 = load double, double* %arrayidx44, align 8
+  %arrayidx41 = getelementptr inbounds double, ptr %inbuf, i64 2
+  %arrayidx44 = getelementptr inbounds double, ptr %inbuf, i64 1
+  %0 = load double, ptr %arrayidx44, align 8
   %add46 = fadd double %0, poison
-  store double %add46, double* %arrayidx41, align 8
-  %1 = load double, double* %inbuf, align 8
+  store double %add46, ptr %arrayidx41, align 8
+  %1 = load double, ptr %inbuf, align 8
   %add49 = fadd double %1, %0
-  store double %add49, double* %arrayidx44, align 8
+  store double %add49, ptr %arrayidx44, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
index b0a919f3a9bf0..8f693c8331161 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -104,45 +104,43 @@ define void @intrapred_luma() {
 ; CHECK-LABEL: @intrapred_luma(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV153:%.*]] = trunc i32 undef to i16
-; CHECK-NEXT:    [[ARRAYIDX154:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 12
-; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX154]], align 8
-; CHECK-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 11
-; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX155]], align 2
-; CHECK-NEXT:    [[ARRAYIDX156:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 10
-; CHECK-NEXT:    store i16 [[CONV153]], i16* [[ARRAYIDX156]], align 4
+; CHECK-NEXT:    [[ARRAYIDX154:%.*]] = getelementptr inbounds [13 x i16], ptr undef, i64 0, i64 12
+; CHECK-NEXT:    store i16 [[CONV153]], ptr [[ARRAYIDX154]], align 8
+; CHECK-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds [13 x i16], ptr undef, i64 0, i64 11
+; CHECK-NEXT:    store i16 [[CONV153]], ptr [[ARRAYIDX155]], align 2
+; CHECK-NEXT:    [[ARRAYIDX156:%.*]] = getelementptr inbounds [13 x i16], ptr undef, i64 0, i64 10
+; CHECK-NEXT:    store i16 [[CONV153]], ptr [[ARRAYIDX156]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %conv153 = trunc i32 undef to i16
-  %arrayidx154 = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 12
-  store i16 %conv153, i16* %arrayidx154, align 8
-  %arrayidx155 = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 11
-  store i16 %conv153, i16* %arrayidx155, align 2
-  %arrayidx156 = getelementptr inbounds [13 x i16], [13 x i16]* undef, i64 0, i64 10
-  store i16 %conv153, i16* %arrayidx156, align 4
+  %arrayidx154 = getelementptr inbounds [13 x i16], ptr undef, i64 0, i64 12
+  store i16 %conv153, ptr %arrayidx154, align 8
+  %arrayidx155 = getelementptr inbounds [13 x i16], ptr undef, i64 0, i64 11
+  store i16 %conv153, ptr %arrayidx155, align 2
+  %arrayidx156 = getelementptr inbounds [13 x i16], ptr undef, i64 0, i64 10
+  store i16 %conv153, ptr %arrayidx156, align 4
   ret void
 }
 
-define fastcc void @dct36(double* %inbuf) {
+define fastcc void @dct36(ptr %inbuf) {
 ; CHECK-LABEL: @dct36(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds double, ptr [[INBUF:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[INBUF]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[ARRAYIDX44]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx41 = getelementptr inbounds double, double* %inbuf, i64 2
-  %arrayidx44 = getelementptr inbounds double, double* %inbuf, i64 1
-  %0 = load double, double* %arrayidx44, align 8
+  %arrayidx41 = getelementptr inbounds double, ptr %inbuf, i64 2
+  %arrayidx44 = getelementptr inbounds double, ptr %inbuf, i64 1
+  %0 = load double, ptr %arrayidx44, align 8
   %add46 = fadd double %0, undef
-  store double %add46, double* %arrayidx41, align 8
-  %1 = load double, double* %inbuf, align 8
+  store double %add46, ptr %arrayidx41, align 8
+  %1 = load double, ptr %inbuf, align 8
   %add49 = fadd double %1, %0
-  store double %add49, double* %arrayidx44, align 8
+  store double %add49, ptr %arrayidx44, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 19123d50285cc..620d589406387 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -22,12 +22,12 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
+; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
-; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
+; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
@@ -56,12 +56,12 @@ for.body:
   %p3.addr.0258 = phi double [ %add, %bb1 ], [ %add28, %for.body ]
   %vecinit.i.i237 = insertelement <2 x double> poison, double %t.0259, i32 0
   %x13 = tail call i32 @_xfn(<2 x double> %vecinit.i.i237) #2
-  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %tab1, i64 0, i64 %indvars.iv266
-  store i32 %x13, i32* %arrayidx, align 4, !tbaa !4
+  %arrayidx = getelementptr inbounds [256 x i32], ptr %tab1, i64 0, i64 %indvars.iv266
+  store i32 %x13, ptr %arrayidx, align 4, !tbaa !4
   %vecinit.i.i = insertelement <2 x double> poison, double %p3.addr.0258, i32 0
   %x14 = tail call i32 @_xfn(<2 x double> %vecinit.i.i) #2
-  %arrayidx26 = getelementptr inbounds [256 x i32], [256 x i32]* %tab2, i64 0, i64 %indvars.iv266
-  store i32 %x14, i32* %arrayidx26, align 4, !tbaa !4
+  %arrayidx26 = getelementptr inbounds [256 x i32], ptr %tab2, i64 0, i64 %indvars.iv266
+  store i32 %x14, ptr %arrayidx26, align 4, !tbaa !4
   %add27 = fadd double %mul19, %t.0259
   %add28 = fadd double %mul21, %p3.addr.0258
   %indvars.iv.next267 = add nuw nsw i64 %indvars.iv266, 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 9b73a5b2e7376..13eaeda3e65dc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -22,12 +22,12 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
+; CHECK-NEXT:    store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
-; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
+; CHECK-NEXT:    store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
@@ -56,12 +56,12 @@ for.body:
   %p3.addr.0258 = phi double [ %add, %bb1 ], [ %add28, %for.body ]
   %vecinit.i.i237 = insertelement <2 x double> undef, double %t.0259, i32 0
   %x13 = tail call i32 @_xfn(<2 x double> %vecinit.i.i237) #2
-  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %tab1, i64 0, i64 %indvars.iv266
-  store i32 %x13, i32* %arrayidx, align 4, !tbaa !4
+  %arrayidx = getelementptr inbounds [256 x i32], ptr %tab1, i64 0, i64 %indvars.iv266
+  store i32 %x13, ptr %arrayidx, align 4, !tbaa !4
   %vecinit.i.i = insertelement <2 x double> undef, double %p3.addr.0258, i32 0
   %x14 = tail call i32 @_xfn(<2 x double> %vecinit.i.i) #2
-  %arrayidx26 = getelementptr inbounds [256 x i32], [256 x i32]* %tab2, i64 0, i64 %indvars.iv266
-  store i32 %x14, i32* %arrayidx26, align 4, !tbaa !4
+  %arrayidx26 = getelementptr inbounds [256 x i32], ptr %tab2, i64 0, i64 %indvars.iv266
+  store i32 %x14, ptr %arrayidx26, align 4, !tbaa !4
   %add27 = fadd double %mul19, %t.0259
   %add28 = fadd double %mul21, %p3.addr.0258
   %indvars.iv.next267 = add nuw nsw i64 %indvars.iv266, 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
index 27a570dbc2893..1c0fa1d85f9e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
@@ -29,7 +29,6 @@ define void @SIM4() {
 ; CHECK:       if.end98:
 ; CHECK-NEXT:    br i1 undef, label [[LAND_LHS_TRUE167]], label [[IF_THEN103:%.*]]
 ; CHECK:       if.then103:
-; CHECK-NEXT:    [[FROM1115:%.*]] = getelementptr inbounds [[STRUCT__EXON_T_12_103_220_363_480_649_740_857_1039_1065_1078_1091_1117_1130_1156_1169_1195_1221_1234_1286_1299_1312_1338_1429_1455_1468_1494_1520_1884_1897_1975_2066_2105_2170_2171:%.*]], %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
 ; CHECK-NEXT:    [[DOTSUB100:%.*]] = select i1 undef, i32 250, i32 undef
 ; CHECK-NEXT:    [[MUL114:%.*]] = shl nsw i32 [[DOTSUB100]], 2
 ; CHECK-NEXT:    [[COND125:%.*]] = select i1 undef, i32 undef, i32 [[MUL114]]
@@ -69,8 +68,7 @@ define void @SIM4() {
 ; CHECK-NEXT:    br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]]
 ; CHECK:       if.then157:
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[FROM1115]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr undef, align 4
 ; CHECK-NEXT:    br label [[LAND_LHS_TRUE167]]
 ; CHECK:       land.lhs.true167:
 ; CHECK-NEXT:    unreachable
@@ -109,13 +107,12 @@ land.rhs.lr.ph:                                   ; preds = %if.then17
   unreachable
 
 if.end98:                                         ; preds = %if.then17
-  %from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171, %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 1
+  %from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171, ptr undef, i64 0, i32 1
   br i1 undef, label %land.lhs.true167, label %if.then103
 
 if.then103:                                       ; preds = %if.end98
   %.sub100 = select i1 undef, i32 250, i32 undef
   %mul114 = shl nsw i32 %.sub100, 2
-  %from1115 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171, %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
   %cond125 = select i1 undef, i32 undef, i32 %mul114
   br label %for.cond.i
 
@@ -168,9 +165,9 @@ extend_bw.exit:                                   ; preds = %while.end275.i, %if
 
 if.then157:                                       ; preds = %extend_bw.exit
   %add158 = add nsw i32 %add14.i1262, 1
-  store i32 %add158, i32* %from299, align 4
+  store i32 %add158, ptr %from299, align 4
   %add160 = add nsw i32 %add15.i1261, 1
-  store i32 %add160, i32* %from1115, align 4
+  store i32 %add160, ptr undef, align 4
   br label %land.lhs.true167
 
 land.lhs.true167:                                 ; preds = %if.then157, %extend_bw.exit, %if.end98, %land.lhs.true, %if.end12

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
index f47cf2ef88b38..d4d62706cda57 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-; int foo(double *A, float *B, int g) {
+; int foo(ptr A, ptr B, int g) {
 ;   float B0 = B[0];
 ;   float B1 = B[1]; <----- BasicBlock #1
 ;   B0 += 5;
@@ -17,11 +17,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; }
 
 
-define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
+define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %g) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], <float 5.000000e+00, float 8.000000e+00>
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[G:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
@@ -30,17 +29,15 @@ define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[A]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP6]], ptr [[A]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load float, float* %B, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float, float* %arrayidx1, align 4
+  %0 = load float, ptr %B, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i64 1
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd float %0, 5.000000e+00
   %add2 = fadd float %1, 8.000000e+00
   %tobool = icmp eq i32 %g, 0
@@ -52,14 +49,14 @@ if.then:
 
 if.end:
   %conv = fpext float %add to double
-  %2 = load double, double* %A, align 8
+  %2 = load double, ptr %A, align 8
   %add4 = fadd double %conv, %2
-  store double %add4, double* %A, align 8
+  store double %add4, ptr %A, align 8
   %conv5 = fpext float %add2 to double
-  %arrayidx6 = getelementptr inbounds double, double* %A, i64 1
-  %3 = load double, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %A, i64 1
+  %3 = load double, ptr %arrayidx6, align 8
   %add7 = fadd double %conv5, %3
-  store double %add7, double* %arrayidx6, align 8
+  store double %add7, ptr %arrayidx6, align 8
   ret i32 undef
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll
index ae00ec134c803..bcee81f901987 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse_extractelement.ll
@@ -1,51 +1,49 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
-define void @test(i32* %ptr, i32* noalias %s)  {
+define void @test(ptr %ptr, ptr noalias %s)  {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[PTR:%.*]], null
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[S:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[S:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
 ; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop1:
-; CHECK-NEXT:    store i32 [[TMP3]], i32* [[S]], align 4
+; CHECK-NEXT:    store i32 [[TMP3]], ptr [[S]], align 4
 ; CHECK-NEXT:    br i1 true, label [[LOOP1]], label [[CONT:%.*]]
 ; CHECK:       cont:
 ; CHECK-NEXT:    br i1 true, label [[LOOP]], label [[BAIL_OUT]]
 ; CHECK:       bail_out:
 ; CHECK-NEXT:    [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP3]], [[CONT]] ]
-; CHECK-NEXT:    store i32 [[DUMMY_PHI]], i32* [[S]], align 4
+; CHECK-NEXT:    store i32 [[DUMMY_PHI]], ptr [[S]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %cmp = icmp eq i32* %ptr, null
+  %cmp = icmp eq ptr %ptr, null
   br i1 %cmp, label %loop, label %bail_out
 
 loop:
-  %0 = load i32, i32 * %ptr , align 4
-  %1 = getelementptr inbounds i32, i32 * %ptr, i64 1
-  %2 = load i32, i32 * %1 , align 4
-  %3 = getelementptr inbounds i32, i32 *%ptr, i64 2
-  %4 = load i32, i32 * %3 , align 4
-  %5 = getelementptr inbounds i32, i32 *%ptr, i64 3
-  %6 = load i32, i32 * %5 , align 4
-  store i32 %0, i32* %s, align 4
-  %7 = getelementptr inbounds i32, i32 * %s, i64 1
-  store i32 %2, i32* %7, align 4
-  %8 = getelementptr inbounds i32, i32 * %s, i64 2
-  store i32 %4, i32* %8, align 4
-  %9 = getelementptr inbounds i32, i32 * %s, i64 3
-  store i32 %6, i32* %9, align 4
+  %0 = load i32, ptr %ptr , align 4
+  %1 = getelementptr inbounds i32, ptr %ptr, i64 1
+  %2 = load i32, ptr %1 , align 4
+  %3 = getelementptr inbounds i32, ptr %ptr, i64 2
+  %4 = load i32, ptr %3 , align 4
+  %5 = getelementptr inbounds i32, ptr %ptr, i64 3
+  %6 = load i32, ptr %5 , align 4
+  store i32 %0, ptr %s, align 4
+  %7 = getelementptr inbounds i32, ptr %s, i64 1
+  store i32 %2, ptr %7, align 4
+  %8 = getelementptr inbounds i32, ptr %s, i64 2
+  store i32 %4, ptr %8, align 4
+  %9 = getelementptr inbounds i32, ptr %s, i64 3
+  store i32 %6, ptr %9, align 4
   br label %loop1
 
 loop1:
-  store i32 %0, i32* %s, align 4
+  store i32 %0, ptr %s, align 4
   br i1 true, label %loop1, label %cont
 
 cont:
@@ -53,7 +51,7 @@ cont:
 
 bail_out:
   %dummy_phi = phi i32 [ 1, %entry ], [ %0, %cont ]
-  store i32 %dummy_phi, i32* %s, align 4
+  store i32 %dummy_phi, ptr %s, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ctlz.ll b/llvm/test/Transforms/SLPVectorizer/X86/ctlz.ll
index 2974ca8fb6d34..0462f125955bf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ctlz.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ctlz.ll
@@ -27,160 +27,160 @@ declare  i8 @llvm.ctlz.i8(i8, i1)
 
 define void @ctlz_2i64() #0 {
 ; SSE-LABEL: @ctlz_2i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
 ; SSE-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
-; SSE-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctlz_2i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
 ; AVX1-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
-; AVX1-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX1-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 8
+; AVX1-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctlz_2i64(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
 ; AVX2-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
-; AVX2-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX2-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 8
+; AVX2-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ctlz_2i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
   %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
-  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %ctlz0, ptr @dst64, align 8
+  store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @ctlz_4i64() #0 {
 ; SSE-LABEL: @ctlz_4i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
 ; SSE-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
 ; SSE-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
 ; SSE-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
-; SSE-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 4
+; SSE-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; SSE-NEXT:    store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; SSE-NEXT:    store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctlz_4i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
 ; AVX1-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
 ; AVX1-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
 ; AVX1-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
-; AVX1-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX1-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX1-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX1-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX1-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 4
+; AVX1-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX1-NEXT:    store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX1-NEXT:    store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctlz_4i64(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX2-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX2-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX2-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX2-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX2-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
 ; AVX2-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
 ; AVX2-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
 ; AVX2-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
-; AVX2-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX2-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX2-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX2-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX2-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 4
+; AVX2-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX2-NEXT:    store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX2-NEXT:    store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ctlz_4i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX512-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
   %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
   %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 0)
   %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 0)
-  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %ctlz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %ctlz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %ctlz0, ptr @dst64, align 4
+  store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @ctlz_4i32() #0 {
 ; CHECK-LABEL: @ctlz_4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
   %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
   %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
   %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
-  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %ctlz0, ptr @dst32, align 4
+  store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @ctlz_8i32() #0 {
 ; SSE-LABEL: @ctlz_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP3]], i1 false)
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctlz_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 false)
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
   %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
   %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
@@ -189,32 +189,32 @@ define void @ctlz_8i32() #0 {
   %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 0)
   %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 0)
   %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 0)
-  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %ctlz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %ctlz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %ctlz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %ctlz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %ctlz0, ptr @dst32, align 2
+  store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @ctlz_8i16() #0 {
 ; CHECK-LABEL: @ctlz_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false)
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
   %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
   %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
@@ -223,49 +223,49 @@ define void @ctlz_8i16() #0 {
   %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
   %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
   %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
-  store i16 %ctlz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %ctlz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %ctlz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %ctlz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %ctlz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %ctlz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %ctlz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %ctlz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %ctlz0, ptr @dst16, align 2
+  store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @ctlz_16i16() #0 {
 ; SSE-LABEL: @ctlz_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctlz_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 false)
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %ctlz0  = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
   %ctlz1  = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
   %ctlz2  = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
@@ -282,48 +282,48 @@ define void @ctlz_16i16() #0 {
   %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 0)
   %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 0)
   %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 0)
-  store i16 %ctlz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %ctlz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %ctlz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %ctlz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %ctlz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %ctlz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %ctlz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %ctlz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %ctlz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %ctlz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %ctlz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %ctlz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %ctlz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %ctlz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %ctlz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %ctlz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %ctlz0 , ptr @dst16, align 2
+  store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 
 define void @ctlz_16i8() #0 {
 ; CHECK-LABEL: @ctlz_16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false)
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
   %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
   %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
   %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
@@ -340,73 +340,73 @@ define void @ctlz_16i8() #0 {
   %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
   %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
   %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
-  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+  store i8 %ctlz0 , ptr @dst8, align 1
+  store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
   ret void
 }
 
 define void @ctlz_32i8() #0 {
 ; SSE-LABEL: @ctlz_32i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctlz_32i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 false)
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
-  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
-  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
-  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
-  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
-  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
-  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
-  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
-  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
-  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
-  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
-  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
-  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
-  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
-  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
-  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
-  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
+  %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
+  %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
+  %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
+  %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
+  %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
+  %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
+  %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
+  %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
+  %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
+  %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
+  %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
+  %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
+  %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
+  %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
+  %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
+  %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
   %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
   %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
   %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
@@ -439,38 +439,38 @@ define void @ctlz_32i8() #0 {
   %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 0)
   %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 0)
   %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 0)
-  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
-  store i8 %ctlz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
-  store i8 %ctlz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
-  store i8 %ctlz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
-  store i8 %ctlz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
-  store i8 %ctlz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
-  store i8 %ctlz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
-  store i8 %ctlz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
-  store i8 %ctlz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
-  store i8 %ctlz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
-  store i8 %ctlz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
-  store i8 %ctlz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
-  store i8 %ctlz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
-  store i8 %ctlz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
-  store i8 %ctlz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
-  store i8 %ctlz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
-  store i8 %ctlz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+  store i8 %ctlz0 , ptr @dst8, align 1
+  store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
+  store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
+  store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
+  store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
+  store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
+  store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
+  store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
+  store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
+  store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
+  store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
+  store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
+  store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
+  store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
+  store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
+  store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
+  store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
+  store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
   ret void
 }
 
@@ -480,182 +480,182 @@ define void @ctlz_32i8() #0 {
 
 define void @ctlz_undef_2i64() #0 {
 ; SSE-LABEL: @ctlz_undef_2i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
-; SSE-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctlz_undef_2i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
-; AVX1-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX1-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 8
+; AVX1-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctlz_undef_2i64(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
 ; AVX2-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
-; AVX2-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX2-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 8
+; AVX2-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ctlz_undef_2i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
   %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
-  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %ctlz0, ptr @dst64, align 8
+  store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @ctlz_undef_4i64() #0 {
 ; SSE-LABEL: @ctlz_undef_4i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
 ; SSE-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
 ; SSE-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
-; SSE-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 4
+; SSE-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; SSE-NEXT:    store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; SSE-NEXT:    store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctlz_undef_4i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
 ; AVX1-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
 ; AVX1-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
-; AVX1-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX1-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX1-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX1-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX1-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 4
+; AVX1-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX1-NEXT:    store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX1-NEXT:    store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctlz_undef_4i64(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX2-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX2-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX2-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX2-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX2-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
 ; AVX2-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
 ; AVX2-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
 ; AVX2-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
-; AVX2-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX2-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX2-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX2-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX2-NEXT:    store i64 [[CTLZ0]], ptr @dst64, align 4
+; AVX2-NEXT:    store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX2-NEXT:    store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX2-NEXT:    store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ctlz_undef_4i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX512-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
   %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
   %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 -1)
   %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 -1)
-  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %ctlz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %ctlz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %ctlz0, ptr @dst64, align 4
+  store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @ctlz_undef_4i32() #0 {
 ; SSE-LABEL: @ctlz_undef_4i32(
-; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
 ; SSE-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
 ; SSE-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
 ; SSE-NEXT:    [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
-; SSE-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE-NEXT:    store i32 [[CTLZ0]], ptr @dst32, align 4
+; SSE-NEXT:    store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+; SSE-NEXT:    store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctlz_undef_4i32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
 ; AVX1-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
 ; AVX1-NEXT:    [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
-; AVX1-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; AVX1-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; AVX1-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; AVX1-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; AVX1-NEXT:    store i32 [[CTLZ0]], ptr @dst32, align 4
+; AVX1-NEXT:    store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+; AVX1-NEXT:    store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+; AVX1-NEXT:    store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctlz_undef_4i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
-; AVX2-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX2-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ctlz_undef_4i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX512-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
   %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
   %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
   %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
-  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %ctlz0, ptr @dst32, align 4
+  store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @ctlz_undef_8i32() #0 {
 ; SSE-LABEL: @ctlz_undef_8i32(
-; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; SSE-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; SSE-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; SSE-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; SSE-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 2
+; SSE-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+; SSE-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+; SSE-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+; SSE-NEXT:    [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+; SSE-NEXT:    [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+; SSE-NEXT:    [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+; SSE-NEXT:    [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
 ; SSE-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
 ; SSE-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
@@ -664,25 +664,25 @@ define void @ctlz_undef_8i32() #0 {
 ; SSE-NEXT:    [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
 ; SSE-NEXT:    [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
 ; SSE-NEXT:    [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
-; SSE-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; SSE-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; SSE-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; SSE-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; SSE-NEXT:    store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; SSE-NEXT:    store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; SSE-NEXT:    store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; SSE-NEXT:    store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE-NEXT:    store i32 [[CTLZ0]], ptr @dst32, align 2
+; SSE-NEXT:    store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+; SSE-NEXT:    store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+; SSE-NEXT:    store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+; SSE-NEXT:    store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+; SSE-NEXT:    store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+; SSE-NEXT:    store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+; SSE-NEXT:    store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctlz_undef_8i32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; AVX1-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; AVX1-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; AVX1-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; AVX1-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; AVX1-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; AVX1-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; AVX1-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 2
+; AVX1-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+; AVX1-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+; AVX1-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+; AVX1-NEXT:    [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+; AVX1-NEXT:    [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+; AVX1-NEXT:    [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+; AVX1-NEXT:    [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
 ; AVX1-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
 ; AVX1-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
@@ -691,36 +691,36 @@ define void @ctlz_undef_8i32() #0 {
 ; AVX1-NEXT:    [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
 ; AVX1-NEXT:    [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
 ; AVX1-NEXT:    [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
-; AVX1-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; AVX1-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; AVX1-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; AVX1-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; AVX1-NEXT:    store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; AVX1-NEXT:    store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; AVX1-NEXT:    store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; AVX1-NEXT:    store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; AVX1-NEXT:    store i32 [[CTLZ0]], ptr @dst32, align 2
+; AVX1-NEXT:    store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+; AVX1-NEXT:    store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+; AVX1-NEXT:    store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+; AVX1-NEXT:    store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+; AVX1-NEXT:    store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+; AVX1-NEXT:    store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+; AVX1-NEXT:    store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctlz_undef_8i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
-; AVX2-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX2-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ctlz_undef_8i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX512-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
   %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
   %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
@@ -729,32 +729,32 @@ define void @ctlz_undef_8i32() #0 {
   %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 -1)
   %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 -1)
   %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 -1)
-  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %ctlz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %ctlz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %ctlz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %ctlz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %ctlz0, ptr @dst32, align 2
+  store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @ctlz_undef_8i16() #0 {
 ; CHECK-LABEL: @ctlz_undef_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true)
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
   %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
   %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
@@ -763,49 +763,49 @@ define void @ctlz_undef_8i16() #0 {
   %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
   %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
   %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
-  store i16 %ctlz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %ctlz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %ctlz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %ctlz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %ctlz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %ctlz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %ctlz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %ctlz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %ctlz0, ptr @dst16, align 2
+  store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @ctlz_undef_16i16() #0 {
 ; SSE-LABEL: @ctlz_undef_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true)
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 true)
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctlz_undef_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 true)
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %ctlz0  = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
   %ctlz1  = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
   %ctlz2  = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
@@ -822,48 +822,48 @@ define void @ctlz_undef_16i16() #0 {
   %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 -1)
   %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 -1)
   %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 -1)
-  store i16 %ctlz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %ctlz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %ctlz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %ctlz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %ctlz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %ctlz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %ctlz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %ctlz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %ctlz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %ctlz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %ctlz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %ctlz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %ctlz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %ctlz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %ctlz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %ctlz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %ctlz0 , ptr @dst16, align 2
+  store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 
 define void @ctlz_undef_16i8() #0 {
 ; CHECK-LABEL: @ctlz_undef_16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true)
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
   %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
   %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
   %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
@@ -880,73 +880,73 @@ define void @ctlz_undef_16i8() #0 {
   %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
   %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
   %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
-  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+  store i8 %ctlz0 , ptr @dst8, align 1
+  store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
   ret void
 }
 
 define void @ctlz_undef_32i8() #0 {
 ; SSE-LABEL: @ctlz_undef_32i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true)
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 true)
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctlz_undef_32i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 true)
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
-  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
-  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
-  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
-  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
-  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
-  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
-  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
-  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
-  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
-  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
-  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
-  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
-  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
-  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
-  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
-  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
+  %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
+  %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
+  %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
+  %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
+  %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
+  %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
+  %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
+  %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
+  %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
+  %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
+  %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
+  %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
+  %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
+  %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
+  %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
+  %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
   %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
   %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
   %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
@@ -979,38 +979,38 @@ define void @ctlz_undef_32i8() #0 {
   %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 -1)
   %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 -1)
   %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 -1)
-  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
-  store i8 %ctlz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
-  store i8 %ctlz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
-  store i8 %ctlz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
-  store i8 %ctlz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
-  store i8 %ctlz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
-  store i8 %ctlz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
-  store i8 %ctlz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
-  store i8 %ctlz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
-  store i8 %ctlz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
-  store i8 %ctlz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
-  store i8 %ctlz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
-  store i8 %ctlz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
-  store i8 %ctlz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
-  store i8 %ctlz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
-  store i8 %ctlz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
-  store i8 %ctlz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+  store i8 %ctlz0 , ptr @dst8, align 1
+  store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
+  store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
+  store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
+  store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
+  store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
+  store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
+  store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
+  store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
+  store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
+  store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
+  store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
+  store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
+  store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
+  store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
+  store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
+  store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
+  store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
index 8c37909be8ed0..aaf640b97721c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ctpop.ll
@@ -22,167 +22,167 @@ declare  i8 @llvm.ctpop.i8(i8)
 
 define void @ctpop_2i64() #0 {
 ; SSE2-LABEL: @ctpop_2i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; SSE2-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
-; SSE2-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; SSE2-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE42-LABEL: @ctpop_2i64(
-; SSE42-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; SSE42-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE42-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; SSE42-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE42-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
 ; SSE42-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
-; SSE42-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE42-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE42-NEXT:    store i64 [[CTPOP0]], ptr @dst64, align 8
+; SSE42-NEXT:    store i64 [[CTPOP1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; SSE42-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctpop_2i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
 ; AVX1-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
-; AVX1-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX1-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store i64 [[CTPOP0]], ptr @dst64, align 8
+; AVX1-NEXT:    store i64 [[CTPOP1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctpop_2i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
-; AVX2-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX2-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX2-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %ctpop0 = call i64 @llvm.ctpop.i64(i64 %ld0)
   %ctpop1 = call i64 @llvm.ctpop.i64(i64 %ld1)
-  store i64 %ctpop0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %ctpop1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %ctpop0, ptr @dst64, align 8
+  store i64 %ctpop1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @ctpop_4i64() #0 {
 ; SSE2-LABEL: @ctpop_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 4
 ; SSE2-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
-; SSE2-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 4
-; SSE2-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE2-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 4
+; SSE2-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
 ; SSE2-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP3]])
-; SSE2-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE2-NEXT:    store <2 x i64> [[TMP4]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE42-LABEL: @ctpop_4i64(
-; SSE42-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE42-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE42-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE42-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE42-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; SSE42-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; SSE42-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; SSE42-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; SSE42-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
 ; SSE42-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
 ; SSE42-NEXT:    [[CTPOP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD2]])
 ; SSE42-NEXT:    [[CTPOP3:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD3]])
-; SSE42-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE42-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE42-NEXT:    store i64 [[CTPOP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE42-NEXT:    store i64 [[CTPOP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE42-NEXT:    store i64 [[CTPOP0]], ptr @dst64, align 4
+; SSE42-NEXT:    store i64 [[CTPOP1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; SSE42-NEXT:    store i64 [[CTPOP2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; SSE42-NEXT:    store i64 [[CTPOP3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; SSE42-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ctpop_4i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTPOP0:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD0]])
 ; AVX1-NEXT:    [[CTPOP1:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD1]])
 ; AVX1-NEXT:    [[CTPOP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD2]])
 ; AVX1-NEXT:    [[CTPOP3:%.*]] = call i64 @llvm.ctpop.i64(i64 [[LD3]])
-; AVX1-NEXT:    store i64 [[CTPOP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX1-NEXT:    store i64 [[CTPOP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX1-NEXT:    store i64 [[CTPOP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX1-NEXT:    store i64 [[CTPOP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX1-NEXT:    store i64 [[CTPOP0]], ptr @dst64, align 4
+; AVX1-NEXT:    store i64 [[CTPOP1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX1-NEXT:    store i64 [[CTPOP2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX1-NEXT:    store i64 [[CTPOP3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ctpop_4i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[TMP1]])
-; AVX2-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX2-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX2-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %ctpop0 = call i64 @llvm.ctpop.i64(i64 %ld0)
   %ctpop1 = call i64 @llvm.ctpop.i64(i64 %ld1)
   %ctpop2 = call i64 @llvm.ctpop.i64(i64 %ld2)
   %ctpop3 = call i64 @llvm.ctpop.i64(i64 %ld3)
-  store i64 %ctpop0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %ctpop1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %ctpop2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %ctpop3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %ctpop0, ptr @dst64, align 4
+  store i64 %ctpop1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %ctpop2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %ctpop3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @ctpop_4i32() #0 {
 ; SSE2-LABEL: @ctpop_4i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; SSE2-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
-; SSE2-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; SSE2-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE42-LABEL: @ctpop_4i32(
-; SSE42-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; SSE42-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; SSE42-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; SSE42-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE42-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 4
+; SSE42-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+; SSE42-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+; SSE42-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
 ; SSE42-NEXT:    [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
 ; SSE42-NEXT:    [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
 ; SSE42-NEXT:    [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
 ; SSE42-NEXT:    [[CTPOP3:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD3]])
-; SSE42-NEXT:    store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE42-NEXT:    store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE42-NEXT:    store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE42-NEXT:    store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE42-NEXT:    store i32 [[CTPOP0]], ptr @dst32, align 4
+; SSE42-NEXT:    store i32 [[CTPOP1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+; SSE42-NEXT:    store i32 [[CTPOP2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+; SSE42-NEXT:    store i32 [[CTPOP3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
 ; SSE42-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctpop_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
-; AVX-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %ld0)
   %ctpop1 = call i32 @llvm.ctpop.i32(i32 %ld1)
   %ctpop2 = call i32 @llvm.ctpop.i32(i32 %ld2)
   %ctpop3 = call i32 @llvm.ctpop.i32(i32 %ld3)
-  store i32 %ctpop0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %ctpop1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %ctpop2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %ctpop3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %ctpop0, ptr @dst32, align 4
+  store i32 %ctpop1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %ctpop2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %ctpop3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @ctpop_8i32() #0 {
 ; SSE2-LABEL: @ctpop_8i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
+; SSE2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2
 ; SSE2-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
-; SSE2-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
-; SSE2-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE2-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 2
+; SSE2-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
 ; SSE2-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP3]])
-; SSE2-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE2-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
 ; SSE2-NEXT:    ret void
 ;
 ; SSE42-LABEL: @ctpop_8i32(
-; SSE42-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; SSE42-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; SSE42-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; SSE42-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; SSE42-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; SSE42-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; SSE42-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; SSE42-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE42-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 2
+; SSE42-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+; SSE42-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+; SSE42-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+; SSE42-NEXT:    [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+; SSE42-NEXT:    [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+; SSE42-NEXT:    [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+; SSE42-NEXT:    [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
 ; SSE42-NEXT:    [[CTPOP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD0]])
 ; SSE42-NEXT:    [[CTPOP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD1]])
 ; SSE42-NEXT:    [[CTPOP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD2]])
@@ -191,30 +191,30 @@ define void @ctpop_8i32() #0 {
 ; SSE42-NEXT:    [[CTPOP5:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD5]])
 ; SSE42-NEXT:    [[CTPOP6:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD6]])
 ; SSE42-NEXT:    [[CTPOP7:%.*]] = call i32 @llvm.ctpop.i32(i32 [[LD7]])
-; SSE42-NEXT:    store i32 [[CTPOP0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; SSE42-NEXT:    store i32 [[CTPOP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; SSE42-NEXT:    store i32 [[CTPOP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; SSE42-NEXT:    store i32 [[CTPOP3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; SSE42-NEXT:    store i32 [[CTPOP4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; SSE42-NEXT:    store i32 [[CTPOP5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; SSE42-NEXT:    store i32 [[CTPOP6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; SSE42-NEXT:    store i32 [[CTPOP7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE42-NEXT:    store i32 [[CTPOP0]], ptr @dst32, align 2
+; SSE42-NEXT:    store i32 [[CTPOP1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+; SSE42-NEXT:    store i32 [[CTPOP2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+; SSE42-NEXT:    store i32 [[CTPOP3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+; SSE42-NEXT:    store i32 [[CTPOP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+; SSE42-NEXT:    store i32 [[CTPOP5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+; SSE42-NEXT:    store i32 [[CTPOP6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+; SSE42-NEXT:    store i32 [[CTPOP7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
 ; SSE42-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctpop_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %ld0)
   %ctpop1 = call i32 @llvm.ctpop.i32(i32 %ld1)
   %ctpop2 = call i32 @llvm.ctpop.i32(i32 %ld2)
@@ -223,32 +223,32 @@ define void @ctpop_8i32() #0 {
   %ctpop5 = call i32 @llvm.ctpop.i32(i32 %ld5)
   %ctpop6 = call i32 @llvm.ctpop.i32(i32 %ld6)
   %ctpop7 = call i32 @llvm.ctpop.i32(i32 %ld7)
-  store i32 %ctpop0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %ctpop1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %ctpop2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %ctpop3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %ctpop4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %ctpop5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %ctpop6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %ctpop7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %ctpop0, ptr @dst32, align 2
+  store i32 %ctpop1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %ctpop2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %ctpop3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %ctpop4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %ctpop5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %ctpop6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %ctpop7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @ctpop_8i16() #0 {
 ; CHECK-LABEL: @ctpop_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[TMP1]])
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %ctpop0 = call i16 @llvm.ctpop.i16(i16 %ld0)
   %ctpop1 = call i16 @llvm.ctpop.i16(i16 %ld1)
   %ctpop2 = call i16 @llvm.ctpop.i16(i16 %ld2)
@@ -257,49 +257,49 @@ define void @ctpop_8i16() #0 {
   %ctpop5 = call i16 @llvm.ctpop.i16(i16 %ld5)
   %ctpop6 = call i16 @llvm.ctpop.i16(i16 %ld6)
   %ctpop7 = call i16 @llvm.ctpop.i16(i16 %ld7)
-  store i16 %ctpop0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %ctpop1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %ctpop2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %ctpop3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %ctpop4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %ctpop5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %ctpop6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %ctpop7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %ctpop0, ptr @dst16, align 2
+  store i16 %ctpop1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %ctpop2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %ctpop3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %ctpop4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %ctpop5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %ctpop6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %ctpop7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @ctpop_16i16() #0 {
 ; SSE-LABEL: @ctpop_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[TMP1]])
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[TMP3]])
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctpop_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> [[TMP1]])
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %ctpop0  = call i16 @llvm.ctpop.i16(i16 %ld0)
   %ctpop1  = call i16 @llvm.ctpop.i16(i16 %ld1)
   %ctpop2  = call i16 @llvm.ctpop.i16(i16 %ld2)
@@ -316,48 +316,48 @@ define void @ctpop_16i16() #0 {
   %ctpop13 = call i16 @llvm.ctpop.i16(i16 %ld13)
   %ctpop14 = call i16 @llvm.ctpop.i16(i16 %ld14)
   %ctpop15 = call i16 @llvm.ctpop.i16(i16 %ld15)
-  store i16 %ctpop0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %ctpop1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %ctpop2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %ctpop3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %ctpop4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %ctpop5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %ctpop6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %ctpop7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %ctpop8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %ctpop9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %ctpop10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %ctpop11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %ctpop12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %ctpop13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %ctpop14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %ctpop15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %ctpop0 , ptr @dst16, align 2
+  store i16 %ctpop1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %ctpop2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %ctpop3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %ctpop4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %ctpop5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %ctpop6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %ctpop7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %ctpop8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %ctpop9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %ctpop10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %ctpop11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %ctpop12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %ctpop13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %ctpop14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %ctpop15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 
 define void @ctpop_16i8() #0 {
 ; CHECK-LABEL: @ctpop_16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP1]])
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
   %ctpop0  = call i8 @llvm.ctpop.i8(i8 %ld0)
   %ctpop1  = call i8 @llvm.ctpop.i8(i8 %ld1)
   %ctpop2  = call i8 @llvm.ctpop.i8(i8 %ld2)
@@ -374,73 +374,73 @@ define void @ctpop_16i8() #0 {
   %ctpop13 = call i8 @llvm.ctpop.i8(i8 %ld13)
   %ctpop14 = call i8 @llvm.ctpop.i8(i8 %ld14)
   %ctpop15 = call i8 @llvm.ctpop.i8(i8 %ld15)
-  store i8 %ctpop0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %ctpop1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %ctpop2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %ctpop3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %ctpop4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %ctpop5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %ctpop6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %ctpop7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %ctpop8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %ctpop9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %ctpop10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %ctpop11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %ctpop12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %ctpop13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %ctpop14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %ctpop15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+  store i8 %ctpop0 , ptr @dst8, align 1
+  store i8 %ctpop1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %ctpop2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %ctpop3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %ctpop4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %ctpop5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %ctpop6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %ctpop7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %ctpop8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %ctpop9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %ctpop10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %ctpop11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %ctpop12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %ctpop13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %ctpop14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %ctpop15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
   ret void
 }
 
 define void @ctpop_32i8() #0 {
 ; SSE-LABEL: @ctpop_32i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP1]])
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP3]])
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ctpop_32i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> [[TMP1]])
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
-  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
-  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
-  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
-  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
-  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
-  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
-  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
-  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
-  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
-  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
-  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
-  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
-  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
-  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
-  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
-  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
+  %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
+  %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
+  %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
+  %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
+  %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
+  %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
+  %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
+  %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
+  %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
+  %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
+  %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
+  %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
+  %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
+  %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
+  %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
+  %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
   %ctpop0  = call i8 @llvm.ctpop.i8(i8 %ld0)
   %ctpop1  = call i8 @llvm.ctpop.i8(i8 %ld1)
   %ctpop2  = call i8 @llvm.ctpop.i8(i8 %ld2)
@@ -473,38 +473,38 @@ define void @ctpop_32i8() #0 {
   %ctpop29 = call i8 @llvm.ctpop.i8(i8 %ld29)
   %ctpop30 = call i8 @llvm.ctpop.i8(i8 %ld30)
   %ctpop31 = call i8 @llvm.ctpop.i8(i8 %ld31)
-  store i8 %ctpop0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %ctpop1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %ctpop2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %ctpop3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %ctpop4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %ctpop5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %ctpop6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %ctpop7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %ctpop8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %ctpop9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %ctpop10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %ctpop11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %ctpop12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %ctpop13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %ctpop14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %ctpop15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
-  store i8 %ctpop16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
-  store i8 %ctpop17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
-  store i8 %ctpop18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
-  store i8 %ctpop19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
-  store i8 %ctpop20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
-  store i8 %ctpop21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
-  store i8 %ctpop22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
-  store i8 %ctpop23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
-  store i8 %ctpop24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
-  store i8 %ctpop25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
-  store i8 %ctpop26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
-  store i8 %ctpop27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
-  store i8 %ctpop28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
-  store i8 %ctpop29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
-  store i8 %ctpop30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
-  store i8 %ctpop31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+  store i8 %ctpop0 , ptr @dst8, align 1
+  store i8 %ctpop1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %ctpop2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %ctpop3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %ctpop4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %ctpop5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %ctpop6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %ctpop7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %ctpop8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %ctpop9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %ctpop10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %ctpop11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %ctpop12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %ctpop13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %ctpop14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %ctpop15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
+  store i8 %ctpop16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
+  store i8 %ctpop17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
+  store i8 %ctpop18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
+  store i8 %ctpop19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
+  store i8 %ctpop20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
+  store i8 %ctpop21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
+  store i8 %ctpop22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
+  store i8 %ctpop23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
+  store i8 %ctpop24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
+  store i8 %ctpop25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
+  store i8 %ctpop26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
+  store i8 %ctpop27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
+  store i8 %ctpop28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
+  store i8 %ctpop29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
+  store i8 %ctpop30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
+  store i8 %ctpop31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
index 1c57a51b7af3e..22f0c3f936509 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll
@@ -27,152 +27,152 @@ declare  i8 @llvm.cttz.i8(i8, i1)
 
 define void @cttz_2i64() #0 {
 ; SSE-LABEL: @cttz_2i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
-; SSE-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @cttz_2i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
 ; AVX1-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
-; AVX1-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX1-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 8
+; AVX1-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @cttz_2i64(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
 ; AVX2-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
-; AVX2-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX2-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 8
+; AVX2-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @cttz_2i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
-  store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %cttz0, ptr @dst64, align 8
+  store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @cttz_4i64() #0 {
 ; SSE-LABEL: @cttz_4i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
 ; SSE-NEXT:    [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 false)
 ; SSE-NEXT:    [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 false)
-; SSE-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE-NEXT:    store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE-NEXT:    store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 4
+; SSE-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; SSE-NEXT:    store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; SSE-NEXT:    store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @cttz_4i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
 ; AVX1-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
 ; AVX1-NEXT:    [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 false)
 ; AVX1-NEXT:    [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 false)
-; AVX1-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX1-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX1-NEXT:    store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX1-NEXT:    store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX1-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 4
+; AVX1-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX1-NEXT:    store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX1-NEXT:    store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @cttz_4i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 false)
-; AVX2-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX2-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @cttz_4i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 false)
-; AVX512-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX512-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
   %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 0)
   %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 0)
-  store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %cttz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %cttz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %cttz0, ptr @dst64, align 4
+  store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %cttz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %cttz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @cttz_4i32() #0 {
 ; SSE-LABEL: @cttz_4i32(
-; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
 ; SSE-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
 ; SSE-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
-; SSE-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE-NEXT:    store i32 [[CTTZ0]], ptr @dst32, align 4
+; SSE-NEXT:    store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+; SSE-NEXT:    store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false)
-; AVX-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
   %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
-  store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %cttz0, ptr @dst32, align 4
+  store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @cttz_8i32() #0 {
 ; SSE-LABEL: @cttz_8i32(
-; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; SSE-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; SSE-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; SSE-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; SSE-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 2
+; SSE-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+; SSE-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+; SSE-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+; SSE-NEXT:    [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+; SSE-NEXT:    [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+; SSE-NEXT:    [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+; SSE-NEXT:    [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
 ; SSE-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
@@ -181,30 +181,30 @@ define void @cttz_8i32() #0 {
 ; SSE-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
 ; SSE-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
 ; SSE-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
-; SSE-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; SSE-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; SSE-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; SSE-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; SSE-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; SSE-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; SSE-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; SSE-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE-NEXT:    store i32 [[CTTZ0]], ptr @dst32, align 2
+; SSE-NEXT:    store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+; SSE-NEXT:    store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+; SSE-NEXT:    store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+; SSE-NEXT:    store i32 [[CTTZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+; SSE-NEXT:    store i32 [[CTTZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+; SSE-NEXT:    store i32 [[CTTZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+; SSE-NEXT:    store i32 [[CTTZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 false)
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
@@ -213,32 +213,32 @@ define void @cttz_8i32() #0 {
   %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 0)
   %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 0)
   %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 0)
-  store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %cttz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %cttz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %cttz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %cttz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %cttz0, ptr @dst32, align 2
+  store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %cttz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %cttz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %cttz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %cttz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @cttz_8i16() #0 {
 ; CHECK-LABEL: @cttz_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 false)
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
   %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
   %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
@@ -247,49 +247,49 @@ define void @cttz_8i16() #0 {
   %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
   %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
   %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
-  store i16 %cttz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %cttz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %cttz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %cttz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %cttz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %cttz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %cttz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %cttz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %cttz0, ptr @dst16, align 2
+  store i16 %cttz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %cttz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %cttz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %cttz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %cttz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %cttz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %cttz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @cttz_16i16() #0 {
 ; SSE-LABEL: @cttz_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP3]], i1 false)
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> [[TMP1]], i1 false)
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %cttz0  = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
   %cttz1  = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
   %cttz2  = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
@@ -306,48 +306,48 @@ define void @cttz_16i16() #0 {
   %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 0)
   %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 0)
   %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 0)
-  store i16 %cttz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %cttz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %cttz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %cttz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %cttz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %cttz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %cttz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %cttz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %cttz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %cttz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %cttz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %cttz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %cttz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %cttz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %cttz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %cttz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %cttz0 , ptr @dst16, align 2
+  store i16 %cttz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %cttz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %cttz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %cttz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %cttz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %cttz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %cttz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %cttz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %cttz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %cttz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %cttz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %cttz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %cttz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %cttz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %cttz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 
 define void @cttz_16i8() #0 {
 ; CHECK-LABEL: @cttz_16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 false)
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
@@ -364,73 +364,73 @@ define void @cttz_16i8() #0 {
   %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
   %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
   %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
-  store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+  store i8 %cttz0 , ptr @dst8, align 1
+  store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
   ret void
 }
 
 define void @cttz_32i8() #0 {
 ; SSE-LABEL: @cttz_32i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP3]], i1 false)
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_32i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> [[TMP1]], i1 false)
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
-  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
-  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
-  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
-  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
-  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
-  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
-  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
-  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
-  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
-  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
-  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
-  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
-  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
-  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
-  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
-  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
+  %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
+  %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
+  %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
+  %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
+  %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
+  %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
+  %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
+  %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
+  %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
+  %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
+  %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
+  %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
+  %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
+  %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
+  %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
+  %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
@@ -463,38 +463,38 @@ define void @cttz_32i8() #0 {
   %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 0)
   %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 0)
   %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 0)
-  store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
-  store i8 %cttz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
-  store i8 %cttz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
-  store i8 %cttz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
-  store i8 %cttz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
-  store i8 %cttz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
-  store i8 %cttz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
-  store i8 %cttz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
-  store i8 %cttz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
-  store i8 %cttz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
-  store i8 %cttz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
-  store i8 %cttz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
-  store i8 %cttz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
-  store i8 %cttz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
-  store i8 %cttz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
-  store i8 %cttz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
-  store i8 %cttz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+  store i8 %cttz0 , ptr @dst8, align 1
+  store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
+  store i8 %cttz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
+  store i8 %cttz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
+  store i8 %cttz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
+  store i8 %cttz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
+  store i8 %cttz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
+  store i8 %cttz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
+  store i8 %cttz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
+  store i8 %cttz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
+  store i8 %cttz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
+  store i8 %cttz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
+  store i8 %cttz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
+  store i8 %cttz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
+  store i8 %cttz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
+  store i8 %cttz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
+  store i8 %cttz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
+  store i8 %cttz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
   ret void
 }
 
@@ -504,173 +504,173 @@ define void @cttz_32i8() #0 {
 
 define void @cttz_undef_2i64() #0 {
 ; SSE-LABEL: @cttz_undef_2i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
-; SSE-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @cttz_undef_2i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
-; AVX1-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX1-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 8
+; AVX1-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @cttz_undef_2i64(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 8
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
 ; AVX2-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
-; AVX2-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX2-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX2-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 8
+; AVX2-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @cttz_undef_2i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX512-NEXT:    store <2 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 8
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
-  store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+  store i64 %cttz0, ptr @dst64, align 8
+  store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @cttz_undef_4i64() #0 {
 ; SSE-LABEL: @cttz_undef_4i64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
 ; SSE-NEXT:    [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 true)
 ; SSE-NEXT:    [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 true)
-; SSE-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; SSE-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; SSE-NEXT:    store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; SSE-NEXT:    store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 4
+; SSE-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; SSE-NEXT:    store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; SSE-NEXT:    store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @cttz_undef_4i64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
 ; AVX1-NEXT:    [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 true)
 ; AVX1-NEXT:    [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 true)
-; AVX1-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-; AVX1-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-; AVX1-NEXT:    store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-; AVX1-NEXT:    store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX1-NEXT:    store i64 [[CTTZ0]], ptr @dst64, align 4
+; AVX1-NEXT:    store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+; AVX1-NEXT:    store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+; AVX1-NEXT:    store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @cttz_undef_4i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 true)
-; AVX2-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX2-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @cttz_undef_4i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX512-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
-  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
-  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
-  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+  %ld0 = load i64, ptr @src64, align 4
+  %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
+  %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
+  %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
   %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 -1)
   %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 -1)
-  store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
-  store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
-  store i64 %cttz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
-  store i64 %cttz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+  store i64 %cttz0, ptr @dst64, align 4
+  store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
+  store i64 %cttz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
+  store i64 %cttz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
   ret void
 }
 
 define void @cttz_undef_4i32() #0 {
 ; SSE-LABEL: @cttz_undef_4i32(
-; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 4
+; SSE-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
 ; SSE-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
 ; SSE-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
-; SSE-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE-NEXT:    store i32 [[CTTZ0]], ptr @dst32, align 4
+; SSE-NEXT:    store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+; SSE-NEXT:    store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @cttz_undef_4i32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-; AVX1-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-; AVX1-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 4
+; AVX1-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+; AVX1-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+; AVX1-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
 ; AVX1-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
 ; AVX1-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
 ; AVX1-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
-; AVX1-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-; AVX1-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-; AVX1-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-; AVX1-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; AVX1-NEXT:    store i32 [[CTTZ0]], ptr @dst32, align 4
+; AVX1-NEXT:    store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+; AVX1-NEXT:    store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+; AVX1-NEXT:    store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @cttz_undef_4i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
-; AVX2-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX2-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @cttz_undef_4i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX512-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 4
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
   %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
-  store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+  store i32 %cttz0, ptr @dst32, align 4
+  store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @cttz_undef_8i32() #0 {
 ; SSE-LABEL: @cttz_undef_8i32(
-; SSE-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; SSE-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; SSE-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; SSE-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; SSE-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; SSE-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; SSE-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; SSE-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 2
+; SSE-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+; SSE-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+; SSE-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+; SSE-NEXT:    [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+; SSE-NEXT:    [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+; SSE-NEXT:    [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+; SSE-NEXT:    [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
 ; SSE-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
 ; SSE-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
 ; SSE-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
@@ -679,25 +679,25 @@ define void @cttz_undef_8i32() #0 {
 ; SSE-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
 ; SSE-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
 ; SSE-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
-; SSE-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; SSE-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; SSE-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; SSE-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; SSE-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; SSE-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; SSE-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; SSE-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE-NEXT:    store i32 [[CTTZ0]], ptr @dst32, align 2
+; SSE-NEXT:    store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+; SSE-NEXT:    store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+; SSE-NEXT:    store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+; SSE-NEXT:    store i32 [[CTTZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+; SSE-NEXT:    store i32 [[CTTZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+; SSE-NEXT:    store i32 [[CTTZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+; SSE-NEXT:    store i32 [[CTTZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @cttz_undef_8i32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-; AVX1-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-; AVX1-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-; AVX1-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-; AVX1-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-; AVX1-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-; AVX1-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; AVX1-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 2
+; AVX1-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+; AVX1-NEXT:    [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+; AVX1-NEXT:    [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+; AVX1-NEXT:    [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+; AVX1-NEXT:    [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+; AVX1-NEXT:    [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+; AVX1-NEXT:    [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
 ; AVX1-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
 ; AVX1-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
 ; AVX1-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
@@ -706,36 +706,36 @@ define void @cttz_undef_8i32() #0 {
 ; AVX1-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
 ; AVX1-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
 ; AVX1-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
-; AVX1-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-; AVX1-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-; AVX1-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-; AVX1-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-; AVX1-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-; AVX1-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-; AVX1-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-; AVX1-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; AVX1-NEXT:    store i32 [[CTTZ0]], ptr @dst32, align 2
+; AVX1-NEXT:    store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+; AVX1-NEXT:    store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+; AVX1-NEXT:    store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+; AVX1-NEXT:    store i32 [[CTTZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+; AVX1-NEXT:    store i32 [[CTTZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+; AVX1-NEXT:    store i32 [[CTTZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+; AVX1-NEXT:    store i32 [[CTTZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @cttz_undef_8i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
-; AVX2-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX2-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @cttz_undef_8i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
-; AVX512-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX512-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 2
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
-  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
-  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
-  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
-  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
-  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
-  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
-  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+  %ld0 = load i32, ptr @src32, align 2
+  %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
+  %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
+  %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
+  %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
+  %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
+  %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
+  %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
@@ -744,32 +744,32 @@ define void @cttz_undef_8i32() #0 {
   %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 -1)
   %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 -1)
   %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 -1)
-  store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
-  store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
-  store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
-  store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
-  store i32 %cttz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
-  store i32 %cttz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
-  store i32 %cttz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
-  store i32 %cttz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+  store i32 %cttz0, ptr @dst32, align 2
+  store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
+  store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
+  store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
+  store i32 %cttz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
+  store i32 %cttz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
+  store i32 %cttz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
+  store i32 %cttz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
   ret void
 }
 
 define void @cttz_undef_8i16() #0 {
 ; CHECK-LABEL: @cttz_undef_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 true)
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
-  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
-  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
-  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
-  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 2
+  %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
+  %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
+  %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
+  %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
   %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
   %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
   %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
@@ -778,49 +778,49 @@ define void @cttz_undef_8i16() #0 {
   %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
   %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
   %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
-  store i16 %cttz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
-  store i16 %cttz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
-  store i16 %cttz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
-  store i16 %cttz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
-  store i16 %cttz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
-  store i16 %cttz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
-  store i16 %cttz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
-  store i16 %cttz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+  store i16 %cttz0, ptr @dst16, align 2
+  store i16 %cttz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
+  store i16 %cttz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
+  store i16 %cttz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
+  store i16 %cttz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
+  store i16 %cttz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
+  store i16 %cttz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
+  store i16 %cttz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
   ret void
 }
 
 define void @cttz_undef_16i16() #0 {
 ; SSE-LABEL: @cttz_undef_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
 ; SSE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 true)
-; SSE-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
+; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP3]], i1 true)
-; SSE-NEXT:    store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_undef_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> [[TMP1]], i1 true)
-; AVX-NEXT:    store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP2]], ptr @dst16, align 2
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
-  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
-  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
-  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
-  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
-  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
-  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
-  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
-  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+  %ld0  = load i16, ptr @src16, align 2
+  %ld1  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  1), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  2), align 2
+  %ld3  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  3), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  4), align 2
+  %ld5  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  5), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  6), align 2
+  %ld7  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  7), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  8), align 2
+  %ld9  = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64  9), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
+  %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
+  %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
+  %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
   %cttz0  = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
   %cttz1  = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
   %cttz2  = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
@@ -837,48 +837,48 @@ define void @cttz_undef_16i16() #0 {
   %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 -1)
   %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 -1)
   %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 -1)
-  store i16 %cttz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
-  store i16 %cttz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
-  store i16 %cttz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
-  store i16 %cttz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
-  store i16 %cttz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
-  store i16 %cttz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
-  store i16 %cttz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
-  store i16 %cttz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
-  store i16 %cttz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
-  store i16 %cttz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
-  store i16 %cttz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
-  store i16 %cttz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
-  store i16 %cttz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
-  store i16 %cttz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
-  store i16 %cttz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
-  store i16 %cttz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+  store i16 %cttz0 , ptr @dst16, align 2
+  store i16 %cttz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  1), align 2
+  store i16 %cttz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  2), align 2
+  store i16 %cttz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  3), align 2
+  store i16 %cttz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  4), align 2
+  store i16 %cttz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  5), align 2
+  store i16 %cttz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  6), align 2
+  store i16 %cttz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  7), align 2
+  store i16 %cttz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  8), align 2
+  store i16 %cttz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64  9), align 2
+  store i16 %cttz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
+  store i16 %cttz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
+  store i16 %cttz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
+  store i16 %cttz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
+  store i16 %cttz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
+  store i16 %cttz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
   ret void
 }
 
 define void @cttz_undef_16i8() #0 {
 ; CHECK-LABEL: @cttz_undef_16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 true)
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
@@ -895,73 +895,73 @@ define void @cttz_undef_16i8() #0 {
   %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
   %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
   %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
-  store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+  store i8 %cttz0 , ptr @dst8, align 1
+  store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
   ret void
 }
 
 define void @cttz_undef_32i8() #0 {
 ; SSE-LABEL: @cttz_undef_32i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
 ; SSE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 true)
-; SSE-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP2]], ptr @dst8, align 1
+; SSE-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP3]], i1 true)
-; SSE-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @cttz_undef_32i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([32 x i8]* @src8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
 ; AVX-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> [[TMP1]], i1 true)
-; AVX-NEXT:    store <32 x i8> [[TMP2]], <32 x i8>* bitcast ([32 x i8]* @dst8 to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP2]], ptr @dst8, align 1
 ; AVX-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
-  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
-  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
-  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
-  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
-  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
-  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
-  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
-  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
-  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
-  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
-  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
-  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
-  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
-  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
-  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
-  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
-  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
-  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
-  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
-  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
-  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
-  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
-  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
-  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+  %ld0  = load i8, ptr @src8, align 1
+  %ld1  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  1), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  2), align 1
+  %ld3  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  3), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  4), align 1
+  %ld5  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  5), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  6), align 1
+  %ld7  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  7), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  8), align 1
+  %ld9  = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64  9), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
+  %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
+  %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
+  %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
+  %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
+  %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
+  %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
+  %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
+  %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
+  %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
+  %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
+  %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
+  %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
+  %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
+  %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
+  %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
+  %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
+  %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
+  %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
+  %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
@@ -994,38 +994,38 @@ define void @cttz_undef_32i8() #0 {
   %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 -1)
   %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 -1)
   %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 -1)
-  store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
-  store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
-  store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
-  store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
-  store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
-  store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
-  store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
-  store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
-  store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
-  store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
-  store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
-  store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
-  store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
-  store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
-  store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
-  store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
-  store i8 %cttz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
-  store i8 %cttz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
-  store i8 %cttz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
-  store i8 %cttz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
-  store i8 %cttz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
-  store i8 %cttz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
-  store i8 %cttz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
-  store i8 %cttz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
-  store i8 %cttz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
-  store i8 %cttz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
-  store i8 %cttz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
-  store i8 %cttz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
-  store i8 %cttz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
-  store i8 %cttz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
-  store i8 %cttz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
-  store i8 %cttz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+  store i8 %cttz0 , ptr @dst8, align 1
+  store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  1), align 1
+  store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  2), align 1
+  store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  3), align 1
+  store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  4), align 1
+  store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  5), align 1
+  store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  6), align 1
+  store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  7), align 1
+  store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  8), align 1
+  store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64  9), align 1
+  store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
+  store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
+  store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
+  store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
+  store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
+  store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
+  store i8 %cttz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
+  store i8 %cttz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
+  store i8 %cttz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
+  store i8 %cttz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
+  store i8 %cttz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
+  store i8 %cttz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
+  store i8 %cttz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
+  store i8 %cttz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
+  store i8 %cttz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
+  store i8 %cttz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
+  store i8 %cttz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
+  store i8 %cttz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
+  store i8 %cttz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
+  store i8 %cttz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
+  store i8 %cttz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
+  store i8 %cttz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
index f81595148e09c..5774e0657b3d7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -12,13 +12,12 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;   A[0] = r; A[1] = g; A[2] = b; A[3] = a;
 ; }
 
-define i32 @foo(i32* nocapture %A) #0 {
+define i32 @foo(ptr nocapture %A) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 13
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
@@ -30,20 +29,19 @@ define i32 @foo(i32* nocapture %A) #0 {
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ [[TMP1]], [[ENTRY]] ], [ [[TMP4]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i32, i32* %A, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 13
-  %4 = load i32, i32* %arrayidx4, align 4
+  %0 = load i32, ptr %A, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 1
+  %1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 2
+  %2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 3
+  %3 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 13
+  %4 = load i32, ptr %arrayidx4, align 4
   %cmp24 = icmp sgt i32 %4, 0
   br i1 %cmp24, label %for.body, label %for.end
 
@@ -66,10 +64,10 @@ for.end:                                          ; preds = %for.body, %entry
   %b.0.lcssa = phi i32 [ %2, %entry ], [ %mul6, %for.body ]
   %g.0.lcssa = phi i32 [ %1, %entry ], [ %mul5, %for.body ]
   %r.0.lcssa = phi i32 [ %0, %entry ], [ %mul, %for.body ]
-  store i32 %r.0.lcssa, i32* %A, align 4
-  store i32 %g.0.lcssa, i32* %arrayidx1, align 4
-  store i32 %b.0.lcssa, i32* %arrayidx2, align 4
-  store i32 %a.0.lcssa, i32* %arrayidx3, align 4
+  store i32 %r.0.lcssa, ptr %A, align 4
+  store i32 %g.0.lcssa, ptr %arrayidx1, align 4
+  store i32 %b.0.lcssa, ptr %arrayidx2, align 4
+  store i32 %a.0.lcssa, ptr %arrayidx3, align 4
   ret i32 undef
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 0d81062e29b8c..8f44574e07b1a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7.0"
 
-; int depth(double *A, int m) {
+; int depth(ptr A, int m) {
 ;   double y0 = 0; double y1 = 1;
 ;   for (int i=0; i < m; i++) {
 ;     y0 = A[4];
@@ -13,10 +13,10 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;   A[8] = y0; A[8+1] = y1;
 ; }
 
-define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
+define i32 @depth(ptr nocapture %A, i32 %m) #0 !dbg !4 {
 ; CHECK-LABEL: @depth(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double* [[A:%.*]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata ptr [[A:%.*]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]]
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 0.000000e+00, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 2.000000e-01, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]]
@@ -24,19 +24,17 @@ define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg [[DBG20]]
 ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg [[DBG20]]
 ; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 4, !dbg [[DBG21:![0-9]+]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*, !dbg [[DBG21]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !dbg [[DBG21]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 4, !dbg [[DBG21:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8, !dbg [[DBG21]]
 ; CHECK-NEXT:    br label [[FOR_END]], !dbg [[DBG20]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ <double 0.000000e+00, double 1.000000e+00>, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 8, !dbg [[DBG23:![0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*, !dbg [[DBG23]]
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8, !dbg [[DBG23]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 8, !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[ARRAYIDX2]], align 8, !dbg [[DBG23]]
 ; CHECK-NEXT:    ret i32 undef, !dbg [[DBG24:![0-9]+]]
 ;
 entry:
-  tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
+  tail call void @llvm.dbg.value(metadata ptr %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
   tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
   tail call void @llvm.dbg.value(metadata double 0.0, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
   tail call void @llvm.dbg.value(metadata double 0.2, i64 0, metadata !15, metadata !DIExpression()), !dbg !21
@@ -45,19 +43,19 @@ entry:
   br i1 %cmp8, label %for.body.lr.ph, label %for.end, !dbg !23
 
 for.body.lr.ph:                                   ; preds = %entry
-  %arrayidx = getelementptr inbounds double, double* %A, i64 4, !dbg !24
-  %0 = load double, double* %arrayidx, align 8, !dbg !24
-  %arrayidx1 = getelementptr inbounds double, double* %A, i64 5, !dbg !29
-  %1 = load double, double* %arrayidx1, align 8, !dbg !29
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 4, !dbg !24
+  %0 = load double, ptr %arrayidx, align 8, !dbg !24
+  %arrayidx1 = getelementptr inbounds double, ptr %A, i64 5, !dbg !29
+  %1 = load double, ptr %arrayidx1, align 8, !dbg !29
   br label %for.end, !dbg !23
 
 for.end:                                          ; preds = %for.body.lr.ph, %entry
   %y1.0.lcssa = phi double [ %1, %for.body.lr.ph ], [ 1.000000e+00, %entry ]
   %y0.0.lcssa = phi double [ %0, %for.body.lr.ph ], [ 0.000000e+00, %entry ]
-  %arrayidx2 = getelementptr inbounds double, double* %A, i64 8, !dbg !30
-  store double %y0.0.lcssa, double* %arrayidx2, align 8, !dbg !30
-  %arrayidx3 = getelementptr inbounds double, double* %A, i64 9, !dbg !30
-  store double %y1.0.lcssa, double* %arrayidx3, align 8, !dbg !30
+  %arrayidx2 = getelementptr inbounds double, ptr %A, i64 8, !dbg !30
+  store double %y0.0.lcssa, ptr %arrayidx2, align 8, !dbg !30
+  %arrayidx3 = getelementptr inbounds double, ptr %A, i64 9, !dbg !30
+  store double %y1.0.lcssa, ptr %arrayidx3, align 8, !dbg !30
   ret i32 undef, !dbg !31
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
index 92bf76af0e0d6..0937b686ee2f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -12,39 +12,37 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;   return 0;
 ; }
 
-define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
+define i32 @foo(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %0 = load i32, i32* %A, align 4
+  %0 = load i32, ptr %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
-  store i32 %add, i32* %B, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32, i32* %arrayidx4, align 4
+  store i32 %add, ptr %B, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
+  %1 = load i32, ptr %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %add8, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32, i32* %arrayidx10, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %add8, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %A, i64 2
+  %2 = load i32, ptr %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %add14, i32* %arrayidx15, align 4
-  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32, i32* %arrayidx16, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %add14, ptr %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32, ptr %A, i64 3
+  %3 = load i32, ptr %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %add20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %add20, ptr %arrayidx21, align 4
   ret i32 0
 }
 
@@ -57,77 +55,73 @@ entry:
 ;   return A[0];
 ; }
 
-define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+define i32 @extr_user(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) {
 ; CHECK-LABEL: @extr_user(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
 entry:
-  %0 = load i32, i32* %A, align 4
+  %0 = load i32, ptr %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
-  store i32 %add, i32* %B, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32, i32* %arrayidx4, align 4
+  store i32 %add, ptr %B, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
+  %1 = load i32, ptr %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %add8, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32, i32* %arrayidx10, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %add8, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %A, i64 2
+  %2 = load i32, ptr %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %add14, i32* %arrayidx15, align 4
-  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32, i32* %arrayidx16, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %add14, ptr %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32, ptr %A, i64 3
+  %3 = load i32, ptr %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %add20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %add20, ptr %arrayidx21, align 4
   ret i32 %0  ;<--------- This value has multiple users
 }
 
 ; In this example we have an external user that is not the first element in the vector.
-define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+define i32 @extr_user1(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) {
 ; CHECK-LABEL: @extr_user1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
 ; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
 entry:
-  %0 = load i32, i32* %A, align 4
+  %0 = load i32, ptr %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
-  store i32 %add, i32* %B, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32, i32* %arrayidx4, align 4
+  store i32 %add, ptr %B, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 1
+  %1 = load i32, ptr %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %add8, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32, i32* %arrayidx10, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %add8, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %A, i64 2
+  %2 = load i32, ptr %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %add14, i32* %arrayidx15, align 4
-  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32, i32* %arrayidx16, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %add14, ptr %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32, ptr %A, i64 3
+  %3 = load i32, ptr %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %add20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %add20, ptr %arrayidx21, align 4
   ret i32 %1  ;<--------- This value has multiple users
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
index 0b48f0fb114bf..60006ccc82517 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
@@ -1,30 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-1 | FileCheck %s
 
-define i32 @diamond_broadcast(i32* noalias nocapture %B, i32* noalias nocapture %A) {
+define i32 @diamond_broadcast(ptr noalias nocapture %B, ptr noalias nocapture %A) {
 ; CHECK-LABEL: @diamond_broadcast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[LD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %ld = load i32, i32* %A, align 4
+  %ld = load i32, ptr %A, align 4
   %mul = mul i32 %ld, %ld
-  store i32 %mul, i32* %B, align 4
+  store i32 %mul, ptr %B, align 4
   %mul8 = mul i32 %ld, %ld
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul8, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul8, ptr %arrayidx9, align 4
   %mul14 = mul i32 %ld, %ld
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul14, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul14, ptr %arrayidx15, align 4
   %mul20 = mul i32 %ld, %ld
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul20, ptr %arrayidx21, align 4
   ret i32 0
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll
index f4e9f6babf77b..f1c5c7596d97c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast_extra_shuffle.ll
@@ -1,84 +1,81 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-2 | FileCheck %s
 
-define i32 @diamond_broadcast(i32* noalias nocapture %B, i32* noalias nocapture %A) {
+define i32 @diamond_broadcast(ptr noalias nocapture %B, ptr noalias nocapture %A) {
 ; CHECK-LABEL: @diamond_broadcast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[LD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %ld = load i32, i32* %A, align 4
+  %ld = load i32, ptr %A, align 4
   %mul = mul i32 %ld, %ld
-  store i32 %mul, i32* %B, align 4
+  store i32 %mul, ptr %B, align 4
   %mul8 = mul i32 %ld, %ld
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul8, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul8, ptr %arrayidx9, align 4
   %mul14 = mul i32 %ld, %ld
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul14, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul14, ptr %arrayidx15, align 4
   %mul20 = mul i32 %ld, undef
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul20, ptr %arrayidx21, align 4
   ret i32 0
 }
 
-define i32 @diamond_broadcast2(i32* noalias nocapture %B, i32* noalias nocapture %A) {
+define i32 @diamond_broadcast2(ptr noalias nocapture %B, ptr noalias nocapture %A) {
 ; CHECK-LABEL: @diamond_broadcast2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[LD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %ld = load i32, i32* %A, align 4
+  %ld = load i32, ptr %A, align 4
   %mul = mul i32 %ld, %ld
-  store i32 %mul, i32* %B, align 4
+  store i32 %mul, ptr %B, align 4
   %mul8 = mul i32 %ld, %ld
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul8, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul8, ptr %arrayidx9, align 4
   %mul14 = mul i32 %ld, %ld
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul14, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul14, ptr %arrayidx15, align 4
   %mul20 = mul i32 undef, %ld
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul20, ptr %arrayidx21, align 4
   ret i32 0
 }
 
-define i32 @diamond_broadcast3(i32* noalias nocapture %B, i32* noalias nocapture %A) {
+define i32 @diamond_broadcast3(ptr noalias nocapture %B, ptr noalias nocapture %A) {
 ; CHECK-LABEL: @diamond_broadcast3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[LD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %ld = load i32, i32* %A, align 4
+  %ld = load i32, ptr %A, align 4
   %mul = mul i32 %ld, %ld
-  store i32 %mul, i32* %B, align 4
+  store i32 %mul, ptr %B, align 4
   %mul8 = mul i32 %ld, %ld
-  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul8, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul8, ptr %arrayidx9, align 4
   %mul14 = mul i32 %ld, undef
-  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul14, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul14, ptr %arrayidx15, align 4
   %mul20 = mul i32 undef, %ld
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul20, i32* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul20, ptr %arrayidx21, align 4
   ret i32 0
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/
diff erent-vec-widths.ll b/llvm/test/Transforms/SLPVectorizer/X86/
diff erent-vec-widths.ll
index 825d7abe88547..feeb34e2f1145 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/
diff erent-vec-widths.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/
diff erent-vec-widths.ll
@@ -10,68 +10,52 @@
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @PR28457(double* noalias nocapture align 32 %q, double* noalias nocapture readonly align 32 %p) {
+define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture readonly align 32 %p) {
 ; SSE-LABEL: @PR28457(
-; SSE-NEXT:    [[P0:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 0
-; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
-; SSE-NEXT:    [[P4:%.*]] = getelementptr inbounds double, double* [[P]], i64 4
-; SSE-NEXT:    [[Q0:%.*]] = getelementptr inbounds double, double* [[Q:%.*]], i64 0
-; SSE-NEXT:    [[Q2:%.*]] = getelementptr inbounds double, double* [[Q]], i64 2
-; SSE-NEXT:    [[Q4:%.*]] = getelementptr inbounds double, double* [[Q]], i64 4
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 2
+; SSE-NEXT:    [[P4:%.*]] = getelementptr inbounds double, ptr [[P]], i64 4
+; SSE-NEXT:    [[Q2:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 2
+; SSE-NEXT:    [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q]], i64 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[P]], align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 1.000000e+00, double 1.000000e+00>
-; SSE-NEXT:    [[TMP4:%.*]] = bitcast double* [[Q0]] to <2 x double>*
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
-; SSE-NEXT:    [[TMP5:%.*]] = bitcast double* [[P2]] to <2 x double>*
-; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr [[Q]], align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[P2]], align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 1.000000e+00, double 1.000000e+00>
-; SSE-NEXT:    [[TMP8:%.*]] = bitcast double* [[Q2]] to <2 x double>*
-; SSE-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
-; SSE-NEXT:    [[TMP9:%.*]] = bitcast double* [[P4]] to <2 x double>*
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 8
+; SSE-NEXT:    store <2 x double> [[TMP7]], ptr [[Q2]], align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr [[P4]], align 8
 ; SSE-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 1.000000e+00, double 1.000000e+00>
-; SSE-NEXT:    [[TMP12:%.*]] = bitcast double* [[Q4]] to <2 x double>*
-; SSE-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8
+; SSE-NEXT:    store <2 x double> [[TMP11]], ptr [[Q4]], align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @PR28457(
-; AVX-NEXT:    [[P0:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 0
-; AVX-NEXT:    [[P4:%.*]] = getelementptr inbounds double, double* [[P]], i64 4
-; AVX-NEXT:    [[Q0:%.*]] = getelementptr inbounds double, double* [[Q:%.*]], i64 0
-; AVX-NEXT:    [[Q4:%.*]] = getelementptr inbounds double, double* [[Q]], i64 4
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast double* [[P0]] to <4 x double>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 8
+; AVX-NEXT:    [[P4:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 4
+; AVX-NEXT:    [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[P]], align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
-; AVX-NEXT:    [[TMP4:%.*]] = bitcast double* [[Q0]] to <4 x double>*
-; AVX-NEXT:    store <4 x double> [[TMP3]], <4 x double>* [[TMP4]], align 8
-; AVX-NEXT:    [[TMP5:%.*]] = bitcast double* [[P4]] to <2 x double>*
-; AVX-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
+; AVX-NEXT:    store <4 x double> [[TMP3]], ptr [[Q]], align 8
+; AVX-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[P4]], align 8
 ; AVX-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 1.000000e+00, double 1.000000e+00>
-; AVX-NEXT:    [[TMP8:%.*]] = bitcast double* [[Q4]] to <2 x double>*
-; AVX-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; AVX-NEXT:    store <2 x double> [[TMP7]], ptr [[Q4]], align 8
 ; AVX-NEXT:    ret void
 ;
-  %p0 = getelementptr inbounds double, double* %p, i64 0
-  %p1 = getelementptr inbounds double, double* %p, i64 1
-  %p2 = getelementptr inbounds double, double* %p, i64 2
-  %p3 = getelementptr inbounds double, double* %p, i64 3
-  %p4 = getelementptr inbounds double, double* %p, i64 4
-  %p5 = getelementptr inbounds double, double* %p, i64 5
+  %p1 = getelementptr inbounds double, ptr %p, i64 1
+  %p2 = getelementptr inbounds double, ptr %p, i64 2
+  %p3 = getelementptr inbounds double, ptr %p, i64 3
+  %p4 = getelementptr inbounds double, ptr %p, i64 4
+  %p5 = getelementptr inbounds double, ptr %p, i64 5
 
-  %q0 = getelementptr inbounds double, double* %q, i64 0
-  %q1 = getelementptr inbounds double, double* %q, i64 1
-  %q2 = getelementptr inbounds double, double* %q, i64 2
-  %q3 = getelementptr inbounds double, double* %q, i64 3
-  %q4 = getelementptr inbounds double, double* %q, i64 4
-  %q5 = getelementptr inbounds double, double* %q, i64 5
+  %q1 = getelementptr inbounds double, ptr %q, i64 1
+  %q2 = getelementptr inbounds double, ptr %q, i64 2
+  %q3 = getelementptr inbounds double, ptr %q, i64 3
+  %q4 = getelementptr inbounds double, ptr %q, i64 4
+  %q5 = getelementptr inbounds double, ptr %q, i64 5
 
-  %d0 = load double, double* %p0
-  %d1 = load double, double* %p1
-  %d2 = load double, double* %p2
-  %d3 = load double, double* %p3
-  %d4 = load double, double* %p4
-  %d5 = load double, double* %p5
+  %d0 = load double, ptr %p
+  %d1 = load double, ptr %p1
+  %d2 = load double, ptr %p2
+  %d3 = load double, ptr %p3
+  %d4 = load double, ptr %p4
+  %d5 = load double, ptr %p5
 
   %a0 = fadd double %d0, 1.0
   %a1 = fadd double %d1, 1.0
@@ -80,11 +64,11 @@ define void @PR28457(double* noalias nocapture align 32 %q, double* noalias noca
   %a4 = fadd double %d4, 1.0
   %a5 = fadd double %d5, 1.0
 
-  store double %a0, double* %q0
-  store double %a1, double* %q1
-  store double %a2, double* %q2
-  store double %a3, double* %q3
-  store double %a4, double* %q4
-  store double %a5, double* %q5
+  store double %a0, ptr %q
+  store double %a1, ptr %q1
+  store double %a2, ptr %q2
+  store double %a3, ptr %q3
+  store double %a4, ptr %q4
+  store double %a5, ptr %q5
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll
index 43ec45040f4e9..af27572cfeaef 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll
@@ -5,22 +5,18 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
 
 ;
-; dot4(float *x, float *y) - ((x[0]*y[0])+(x[1]*y[1])+(x[2]*y[2])+(x[3]*y[3]))
+; dot4(ptr x, ptr y) - ((xptr y[0])+(xptr y[1])+(xptr y[2])+(xptr y[3]))
 ;
 
-define double @dot4f64(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) {
+define double @dot4f64(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %ptry) {
 ; CHECK-LABEL: @dot4f64(
-; CHECK-NEXT:    [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 2
-; CHECK-NEXT:    [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[PTRX2:%.*]] = getelementptr inbounds double, ptr [[PTRX:%.*]], i64 2
+; CHECK-NEXT:    [[PTRY2:%.*]] = getelementptr inbounds double, ptr [[PTRY:%.*]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PTRX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[PTRY2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[PTRX2]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, ptr [[PTRY2]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
@@ -31,20 +27,20 @@ define double @dot4f64(double* dereferenceable(32) %ptrx, double* dereferenceabl
 ; CHECK-NEXT:    [[DOT0123:%.*]] = fadd double [[DOT012]], [[TMP14]]
 ; CHECK-NEXT:    ret double [[DOT0123]]
 ;
-  %ptrx1 = getelementptr inbounds double, double* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds double, double* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds double, double* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds double, double* %ptry, i64 2
-  %ptrx3 = getelementptr inbounds double, double* %ptrx, i64 3
-  %ptry3 = getelementptr inbounds double, double* %ptry, i64 3
-  %x0 = load double, double* %ptrx, align 4
-  %y0 = load double, double* %ptry, align 4
-  %x1 = load double, double* %ptrx1, align 4
-  %y1 = load double, double* %ptry1, align 4
-  %x2 = load double, double* %ptrx2, align 4
-  %y2 = load double, double* %ptry2, align 4
-  %x3 = load double, double* %ptrx3, align 4
-  %y3 = load double, double* %ptry3, align 4
+  %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds double, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds double, ptr %ptry, i64 2
+  %ptrx3 = getelementptr inbounds double, ptr %ptrx, i64 3
+  %ptry3 = getelementptr inbounds double, ptr %ptry, i64 3
+  %x0 = load double, ptr %ptrx, align 4
+  %y0 = load double, ptr %ptry, align 4
+  %x1 = load double, ptr %ptrx1, align 4
+  %y1 = load double, ptr %ptry1, align 4
+  %x2 = load double, ptr %ptrx2, align 4
+  %y2 = load double, ptr %ptry2, align 4
+  %x3 = load double, ptr %ptrx3, align 4
+  %y3 = load double, ptr %ptry3, align 4
   %mul0 = fmul double %x0, %y0
   %mul1 = fmul double %x1, %y1
   %mul2 = fmul double %x2, %y2
@@ -55,19 +51,15 @@ define double @dot4f64(double* dereferenceable(32) %ptrx, double* dereferenceabl
   ret double %dot0123
 }
 
-define float @dot4f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
+define float @dot4f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot4f32(
-; CHECK-NEXT:    [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 2
-; CHECK-NEXT:    [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[PTRX2:%.*]] = getelementptr inbounds float, ptr [[PTRX:%.*]], i64 2
+; CHECK-NEXT:    [[PTRY2:%.*]] = getelementptr inbounds float, ptr [[PTRY:%.*]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[PTRX2]] to <2 x float>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[PTRY2]] to <2 x float>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, ptr [[PTRX2]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, ptr [[PTRY2]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x float> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
@@ -78,20 +70,20 @@ define float @dot4f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1
 ; CHECK-NEXT:    [[DOT0123:%.*]] = fadd float [[DOT012]], [[TMP14]]
 ; CHECK-NEXT:    ret float [[DOT0123]]
 ;
-  %ptrx1 = getelementptr inbounds float, float* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds float, float* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds float, float* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds float, float* %ptry, i64 2
-  %ptrx3 = getelementptr inbounds float, float* %ptrx, i64 3
-  %ptry3 = getelementptr inbounds float, float* %ptry, i64 3
-  %x0 = load float, float* %ptrx, align 4
-  %y0 = load float, float* %ptry, align 4
-  %x1 = load float, float* %ptrx1, align 4
-  %y1 = load float, float* %ptry1, align 4
-  %x2 = load float, float* %ptrx2, align 4
-  %y2 = load float, float* %ptry2, align 4
-  %x3 = load float, float* %ptrx3, align 4
-  %y3 = load float, float* %ptry3, align 4
+  %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds float, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds float, ptr %ptry, i64 2
+  %ptrx3 = getelementptr inbounds float, ptr %ptrx, i64 3
+  %ptry3 = getelementptr inbounds float, ptr %ptry, i64 3
+  %x0 = load float, ptr %ptrx, align 4
+  %y0 = load float, ptr %ptry, align 4
+  %x1 = load float, ptr %ptrx1, align 4
+  %y1 = load float, ptr %ptry1, align 4
+  %x2 = load float, ptr %ptrx2, align 4
+  %y2 = load float, ptr %ptry2, align 4
+  %x3 = load float, ptr %ptrx3, align 4
+  %y3 = load float, ptr %ptry3, align 4
   %mul0 = fmul float %x0, %y0
   %mul1 = fmul float %x1, %y1
   %mul2 = fmul float %x2, %y2
@@ -102,30 +94,28 @@ define float @dot4f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1
   ret float %dot0123
 }
 
-define double @dot4f64_fast(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) {
+define double @dot4f64_fast(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %ptry) {
 ; CHECK-LABEL: @dot4f64_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <4 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <4 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[PTRX:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr [[PTRY:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]])
 ; CHECK-NEXT:    ret double [[TMP6]]
 ;
-  %ptrx1 = getelementptr inbounds double, double* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds double, double* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds double, double* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds double, double* %ptry, i64 2
-  %ptrx3 = getelementptr inbounds double, double* %ptrx, i64 3
-  %ptry3 = getelementptr inbounds double, double* %ptry, i64 3
-  %x0 = load double, double* %ptrx, align 4
-  %y0 = load double, double* %ptry, align 4
-  %x1 = load double, double* %ptrx1, align 4
-  %y1 = load double, double* %ptry1, align 4
-  %x2 = load double, double* %ptrx2, align 4
-  %y2 = load double, double* %ptry2, align 4
-  %x3 = load double, double* %ptrx3, align 4
-  %y3 = load double, double* %ptry3, align 4
+  %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds double, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds double, ptr %ptry, i64 2
+  %ptrx3 = getelementptr inbounds double, ptr %ptrx, i64 3
+  %ptry3 = getelementptr inbounds double, ptr %ptry, i64 3
+  %x0 = load double, ptr %ptrx, align 4
+  %y0 = load double, ptr %ptry, align 4
+  %x1 = load double, ptr %ptrx1, align 4
+  %y1 = load double, ptr %ptry1, align 4
+  %x2 = load double, ptr %ptrx2, align 4
+  %y2 = load double, ptr %ptry2, align 4
+  %x3 = load double, ptr %ptrx3, align 4
+  %y3 = load double, ptr %ptry3, align 4
   %mul0 = fmul double %x0, %y0
   %mul1 = fmul double %x1, %y1
   %mul2 = fmul double %x2, %y2
@@ -136,30 +126,28 @@ define double @dot4f64_fast(double* dereferenceable(32) %ptrx, double* dereferen
   ret double %dot0123
 }
 
-define float @dot4f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
+define float @dot4f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot4f32_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTRX:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[PTRY:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
 ; CHECK-NEXT:    ret float [[TMP6]]
 ;
-  %ptrx1 = getelementptr inbounds float, float* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds float, float* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds float, float* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds float, float* %ptry, i64 2
-  %ptrx3 = getelementptr inbounds float, float* %ptrx, i64 3
-  %ptry3 = getelementptr inbounds float, float* %ptry, i64 3
-  %x0 = load float, float* %ptrx, align 4
-  %y0 = load float, float* %ptry, align 4
-  %x1 = load float, float* %ptrx1, align 4
-  %y1 = load float, float* %ptry1, align 4
-  %x2 = load float, float* %ptrx2, align 4
-  %y2 = load float, float* %ptry2, align 4
-  %x3 = load float, float* %ptrx3, align 4
-  %y3 = load float, float* %ptry3, align 4
+  %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds float, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds float, ptr %ptry, i64 2
+  %ptrx3 = getelementptr inbounds float, ptr %ptrx, i64 3
+  %ptry3 = getelementptr inbounds float, ptr %ptry, i64 3
+  %x0 = load float, ptr %ptrx, align 4
+  %y0 = load float, ptr %ptry, align 4
+  %x1 = load float, ptr %ptrx1, align 4
+  %y1 = load float, ptr %ptry1, align 4
+  %x2 = load float, ptr %ptrx2, align 4
+  %y2 = load float, ptr %ptry2, align 4
+  %x3 = load float, ptr %ptrx3, align 4
+  %y3 = load float, ptr %ptry3, align 4
   %mul0 = fmul float %x0, %y0
   %mul1 = fmul float %x1, %y1
   %mul2 = fmul float %x2, %y2
@@ -171,20 +159,18 @@ define float @dot4f32_fast(float* dereferenceable(16) %ptrx, float* dereferencea
 }
 
 ;
-; dot3(float *x, float *y) - ((x[0]*y[0])+(x[1]*y[1])+(x[2]*y[2]))
+; dot3(ptr x, ptr y) - ((xptr y[0])+(xptr y[1])+(xptr y[2]))
 ;
 
-define double @dot3f64(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) {
+define double @dot3f64(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %ptry) {
 ; CHECK-LABEL: @dot3f64(
-; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
-; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
-; CHECK-NEXT:    [[X0:%.*]] = load double, double* [[PTRX]], align 4
-; CHECK-NEXT:    [[Y0:%.*]] = load double, double* [[PTRY]], align 4
+; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds double, ptr [[PTRX:%.*]], i64 1
+; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds double, ptr [[PTRY:%.*]], i64 1
+; CHECK-NEXT:    [[X0:%.*]] = load double, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[Y0:%.*]] = load double, ptr [[PTRY]], align 4
 ; CHECK-NEXT:    [[MUL0:%.*]] = fmul double [[X0]], [[Y0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[PTRX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRY1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd double [[MUL0]], [[TMP6]]
@@ -192,16 +178,16 @@ define double @dot3f64(double* dereferenceable(32) %ptrx, double* dereferenceabl
 ; CHECK-NEXT:    [[DOT012:%.*]] = fadd double [[DOT01]], [[TMP7]]
 ; CHECK-NEXT:    ret double [[DOT012]]
 ;
-  %ptrx1 = getelementptr inbounds double, double* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds double, double* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds double, double* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds double, double* %ptry, i64 2
-  %x0 = load double, double* %ptrx, align 4
-  %y0 = load double, double* %ptry, align 4
-  %x1 = load double, double* %ptrx1, align 4
-  %y1 = load double, double* %ptry1, align 4
-  %x2 = load double, double* %ptrx2, align 4
-  %y2 = load double, double* %ptry2, align 4
+  %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds double, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds double, ptr %ptry, i64 2
+  %x0 = load double, ptr %ptrx, align 4
+  %y0 = load double, ptr %ptry, align 4
+  %x1 = load double, ptr %ptrx1, align 4
+  %y1 = load double, ptr %ptry1, align 4
+  %x2 = load double, ptr %ptrx2, align 4
+  %y2 = load double, ptr %ptry2, align 4
   %mul0 = fmul double %x0, %y0
   %mul1 = fmul double %x1, %y1
   %mul2 = fmul double %x2, %y2
@@ -210,17 +196,15 @@ define double @dot3f64(double* dereferenceable(32) %ptrx, double* dereferenceabl
   ret double %dot012
 }
 
-define float @dot3f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
+define float @dot3f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot3f32(
-; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
-; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
-; CHECK-NEXT:    [[X0:%.*]] = load float, float* [[PTRX]], align 4
-; CHECK-NEXT:    [[Y0:%.*]] = load float, float* [[PTRY]], align 4
+; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds float, ptr [[PTRX:%.*]], i64 1
+; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds float, ptr [[PTRY:%.*]], i64 1
+; CHECK-NEXT:    [[X0:%.*]] = load float, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[Y0:%.*]] = load float, ptr [[PTRY]], align 4
 ; CHECK-NEXT:    [[MUL0:%.*]] = fmul float [[X0]], [[Y0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTRX1]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTRY1]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd float [[MUL0]], [[TMP6]]
@@ -228,16 +212,16 @@ define float @dot3f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1
 ; CHECK-NEXT:    [[DOT012:%.*]] = fadd float [[DOT01]], [[TMP7]]
 ; CHECK-NEXT:    ret float [[DOT012]]
 ;
-  %ptrx1 = getelementptr inbounds float, float* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds float, float* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds float, float* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds float, float* %ptry, i64 2
-  %x0 = load float, float* %ptrx, align 4
-  %y0 = load float, float* %ptry, align 4
-  %x1 = load float, float* %ptrx1, align 4
-  %y1 = load float, float* %ptry1, align 4
-  %x2 = load float, float* %ptrx2, align 4
-  %y2 = load float, float* %ptry2, align 4
+  %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds float, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds float, ptr %ptry, i64 2
+  %x0 = load float, ptr %ptrx, align 4
+  %y0 = load float, ptr %ptry, align 4
+  %x1 = load float, ptr %ptrx1, align 4
+  %y1 = load float, ptr %ptry1, align 4
+  %x2 = load float, ptr %ptrx2, align 4
+  %y2 = load float, ptr %ptry2, align 4
   %mul0 = fmul float %x0, %y0
   %mul1 = fmul float %x1, %y1
   %mul2 = fmul float %x2, %y2
@@ -246,17 +230,15 @@ define float @dot3f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1
   ret float %dot012
 }
 
-define double @dot3f64_fast(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) {
+define double @dot3f64_fast(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %ptry) {
 ; CHECK-LABEL: @dot3f64_fast(
-; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
-; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
-; CHECK-NEXT:    [[X0:%.*]] = load double, double* [[PTRX]], align 4
-; CHECK-NEXT:    [[Y0:%.*]] = load double, double* [[PTRY]], align 4
+; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds double, ptr [[PTRX:%.*]], i64 1
+; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds double, ptr [[PTRY:%.*]], i64 1
+; CHECK-NEXT:    [[X0:%.*]] = load double, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[Y0:%.*]] = load double, ptr [[PTRY]], align 4
 ; CHECK-NEXT:    [[MUL0:%.*]] = fmul double [[X0]], [[Y0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[PTRX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRY1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd fast double [[MUL0]], [[TMP6]]
@@ -264,16 +246,16 @@ define double @dot3f64_fast(double* dereferenceable(32) %ptrx, double* dereferen
 ; CHECK-NEXT:    [[DOT012:%.*]] = fadd fast double [[DOT01]], [[TMP7]]
 ; CHECK-NEXT:    ret double [[DOT012]]
 ;
-  %ptrx1 = getelementptr inbounds double, double* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds double, double* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds double, double* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds double, double* %ptry, i64 2
-  %x0 = load double, double* %ptrx, align 4
-  %y0 = load double, double* %ptry, align 4
-  %x1 = load double, double* %ptrx1, align 4
-  %y1 = load double, double* %ptry1, align 4
-  %x2 = load double, double* %ptrx2, align 4
-  %y2 = load double, double* %ptry2, align 4
+  %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds double, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds double, ptr %ptry, i64 2
+  %x0 = load double, ptr %ptrx, align 4
+  %y0 = load double, ptr %ptry, align 4
+  %x1 = load double, ptr %ptrx1, align 4
+  %y1 = load double, ptr %ptry1, align 4
+  %x2 = load double, ptr %ptrx2, align 4
+  %y2 = load double, ptr %ptry2, align 4
   %mul0 = fmul double %x0, %y0
   %mul1 = fmul double %x1, %y1
   %mul2 = fmul double %x2, %y2
@@ -282,17 +264,15 @@ define double @dot3f64_fast(double* dereferenceable(32) %ptrx, double* dereferen
   ret double %dot012
 }
 
-define float @dot3f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
+define float @dot3f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot3f32_fast(
-; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
-; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
-; CHECK-NEXT:    [[X0:%.*]] = load float, float* [[PTRX]], align 4
-; CHECK-NEXT:    [[Y0:%.*]] = load float, float* [[PTRY]], align 4
+; CHECK-NEXT:    [[PTRX1:%.*]] = getelementptr inbounds float, ptr [[PTRX:%.*]], i64 1
+; CHECK-NEXT:    [[PTRY1:%.*]] = getelementptr inbounds float, ptr [[PTRY:%.*]], i64 1
+; CHECK-NEXT:    [[X0:%.*]] = load float, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[Y0:%.*]] = load float, ptr [[PTRY]], align 4
 ; CHECK-NEXT:    [[MUL0:%.*]] = fmul float [[X0]], [[Y0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTRX1]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTRY1]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd fast float [[MUL0]], [[TMP6]]
@@ -300,16 +280,16 @@ define float @dot3f32_fast(float* dereferenceable(16) %ptrx, float* dereferencea
 ; CHECK-NEXT:    [[DOT012:%.*]] = fadd fast float [[DOT01]], [[TMP7]]
 ; CHECK-NEXT:    ret float [[DOT012]]
 ;
-  %ptrx1 = getelementptr inbounds float, float* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds float, float* %ptry, i64 1
-  %ptrx2 = getelementptr inbounds float, float* %ptrx, i64 2
-  %ptry2 = getelementptr inbounds float, float* %ptry, i64 2
-  %x0 = load float, float* %ptrx, align 4
-  %y0 = load float, float* %ptry, align 4
-  %x1 = load float, float* %ptrx1, align 4
-  %y1 = load float, float* %ptry1, align 4
-  %x2 = load float, float* %ptrx2, align 4
-  %y2 = load float, float* %ptry2, align 4
+  %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1
+  %ptrx2 = getelementptr inbounds float, ptr %ptrx, i64 2
+  %ptry2 = getelementptr inbounds float, ptr %ptry, i64 2
+  %x0 = load float, ptr %ptrx, align 4
+  %y0 = load float, ptr %ptry, align 4
+  %x1 = load float, ptr %ptrx1, align 4
+  %y1 = load float, ptr %ptry1, align 4
+  %x2 = load float, ptr %ptrx2, align 4
+  %y2 = load float, ptr %ptry2, align 4
   %mul0 = fmul float %x0, %y0
   %mul1 = fmul float %x1, %y1
   %mul2 = fmul float %x2, %y2
@@ -319,99 +299,91 @@ define float @dot3f32_fast(float* dereferenceable(16) %ptrx, float* dereferencea
 }
 
 ;
-; dot2(float *x, float *y) - ((x[0]*y[0])+(x[1]*y[1]))
+; dot2(ptr x, ptr y) - ((xptr y[0])+(xptr y[1]))
 ;
 
-define double @dot2f64(double* dereferenceable(16) %ptrx, double* dereferenceable(16) %ptry) {
+define double @dot2f64(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd double [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    ret double [[DOT01]]
 ;
-  %ptrx1 = getelementptr inbounds double, double* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds double, double* %ptry, i64 1
-  %x0 = load double, double* %ptrx, align 4
-  %y0 = load double, double* %ptry, align 4
-  %x1 = load double, double* %ptrx1, align 4
-  %y1 = load double, double* %ptry1, align 4
+  %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1
+  %x0 = load double, ptr %ptrx, align 4
+  %y0 = load double, ptr %ptry, align 4
+  %x1 = load double, ptr %ptrx1, align 4
+  %y1 = load double, ptr %ptry1, align 4
   %mul0 = fmul double %x0, %y0
   %mul1 = fmul double %x1, %y1
   %dot01 = fadd double %mul0, %mul1
   ret double %dot01
 }
 
-define float @dot2f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
+define float @dot2f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot2f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd float [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    ret float [[DOT01]]
 ;
-  %ptrx1 = getelementptr inbounds float, float* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds float, float* %ptry, i64 1
-  %x0 = load float, float* %ptrx, align 4
-  %y0 = load float, float* %ptry, align 4
-  %x1 = load float, float* %ptrx1, align 4
-  %y1 = load float, float* %ptry1, align 4
+  %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1
+  %x0 = load float, ptr %ptrx, align 4
+  %y0 = load float, ptr %ptry, align 4
+  %x1 = load float, ptr %ptrx1, align 4
+  %y1 = load float, ptr %ptry1, align 4
   %mul0 = fmul float %x0, %y0
   %mul1 = fmul float %x1, %y1
   %dot01 = fadd float %mul0, %mul1
   ret float %dot01
 }
 
-define double @dot2f64_fast(double* dereferenceable(16) %ptrx, double* dereferenceable(16) %ptry) {
+define double @dot2f64_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot2f64_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd fast double [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    ret double [[DOT01]]
 ;
-  %ptrx1 = getelementptr inbounds double, double* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds double, double* %ptry, i64 1
-  %x0 = load double, double* %ptrx, align 4
-  %y0 = load double, double* %ptry, align 4
-  %x1 = load double, double* %ptrx1, align 4
-  %y1 = load double, double* %ptry1, align 4
+  %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1
+  %x0 = load double, ptr %ptrx, align 4
+  %y0 = load double, ptr %ptry, align 4
+  %x1 = load double, ptr %ptrx1, align 4
+  %y1 = load double, ptr %ptry1, align 4
   %mul0 = fmul double %x0, %y0
   %mul1 = fmul double %x1, %y1
   %dot01 = fadd fast double %mul0, %mul1
   ret double %dot01
 }
 
-define float @dot2f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
+define float @dot2f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) {
 ; CHECK-LABEL: @dot2f32_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[DOT01:%.*]] = fadd fast float [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    ret float [[DOT01]]
 ;
-  %ptrx1 = getelementptr inbounds float, float* %ptrx, i64 1
-  %ptry1 = getelementptr inbounds float, float* %ptry, i64 1
-  %x0 = load float, float* %ptrx, align 4
-  %y0 = load float, float* %ptry, align 4
-  %x1 = load float, float* %ptrx1, align 4
-  %y1 = load float, float* %ptry1, align 4
+  %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1
+  %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1
+  %x0 = load float, ptr %ptrx, align 4
+  %y0 = load float, ptr %ptry, align 4
+  %x1 = load float, ptr %ptrx1, align 4
+  %y1 = load float, ptr %ptry1, align 4
   %mul0 = fmul float %x0, %y0
   %mul1 = fmul float %x1, %y1
   %dot01 = fadd fast float %mul0, %mul1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
index ae3c92a37fea8..597ccd712e0a6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-; double foo(double * restrict b,  double * restrict a, int n, int m) {
+; double foo(ptr restrict b,  ptr restrict a, int n, int m) {
 ;   double r=a[1];
 ;   double g=a[0];
 ;   double x;
@@ -23,11 +23,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;   return x; <-- must extract here!
 ; }
 
-define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) {
+define double @ext_user(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) {
 ; CHECK-LABEL: @ext_user(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -39,15 +38,14 @@ define double @ext_user(double* noalias nocapture %B, double* noalias nocapture
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
 ; CHECK-NEXT:    ret double [[TMP7]]
 ;
 entry:
-  %arrayidx = getelementptr inbounds double, double* %A, i64 1
-  %0 = load double, double* %arrayidx, align 8
-  %1 = load double, double* %A, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 1
+  %0 = load double, ptr %arrayidx, align 8
+  %1 = load double, ptr %A, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -65,9 +63,9 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
-  store double %add5, double* %B, align 8
-  %arrayidx7 = getelementptr inbounds double, double* %B, i64 1
-  store double %add4, double* %arrayidx7, align 8
+  store double %add5, ptr %B, align 8
+  %arrayidx7 = getelementptr inbounds double, ptr %B, i64 1
+  store double %add4, ptr %arrayidx7, align 8
   ret double %mul3
 }
 
@@ -77,12 +75,12 @@ for.end:                                          ; preds = %for.body
 ; This test would assert because we would keep the scalar fpext and fadd alive.
 ; PR18129
 
-define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
+define i32 @needtogather(ptr noalias %a, ptr noalias %b,  ptr noalias %c,
 ; CHECK-LABEL: @needtogather(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[D:%.*]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[SUB:%.*]] = fsub float 0.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUB]], 0.000000e+00
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[MUL]]
@@ -98,16 +96,16 @@ define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi double [ [[CONV6]], [[IF_THEN]] ], [ [[CONV1]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[E_0:%.*]] = phi double [ [[CONV1]], [[IF_THEN]] ], [ [[CONV6]], [[ENTRY]] ]
-; CHECK-NEXT:    store double [[STOREMERGE]], double* [[A:%.*]], align 8
+; CHECK-NEXT:    store double [[STOREMERGE]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[CONV7:%.*]] = fptosi double [[E_0]] to i32
-; CHECK-NEXT:    store i32 [[CONV7]], i32* [[B:%.*]], align 4
+; CHECK-NEXT:    store i32 [[CONV7]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
-  i32 * noalias %d) {
+  ptr noalias %d) {
 entry:
-  %0 = load i32, i32* %d, align 4
+  %0 = load i32, ptr %d, align 4
   %conv = sitofp i32 %0 to float
-  %1 = load float, float* %c
+  %1 = load float, ptr %c
   %sub = fsub float 0.000000e+00, %1
   %mul = fmul float %sub, 0.000000e+00
   %add = fadd float %conv, %mul
@@ -125,8 +123,8 @@ if.then:
 if.end:
   %storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
   %e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
-  store double %storemerge, double* %a, align 8
+  store double %storemerge, ptr %a, align 8
   %conv7 = fptosi double %e.0 to i32
-  store i32 %conv7, i32* %b, align 4
+  store i32 %conv7, ptr %b, align 4
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll
index c58701999ad4e..ba4c2e6248039 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load-inseltpoison.ll
@@ -3,30 +3,29 @@
 
 @array = external global [20 x [13 x i32]]
 
-define void @hoge(i64 %idx, <4 x i32>* %sink) {
+define void @hoge(i64 %idx, ptr %sink) {
 ; CHECK-LABEL: @hoge(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 [[IDX:%.*]], i64 5
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[SINK:%.*]], align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[SINK:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %0 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 5
-  %1 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 6
-  %2 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 7
-  %3 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 8
-  %4 = load i32, i32* %1, align 4
+  %0 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 5
+  %1 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 6
+  %2 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 7
+  %3 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 8
+  %4 = load i32, ptr %1, align 4
   %5 = insertelement <4 x i32> poison, i32 %4, i32 0
-  %6 = load i32, i32* %2, align 4
+  %6 = load i32, ptr %2, align 4
   %7 = insertelement <4 x i32> %5, i32 %6, i32 1
-  %8 = load i32, i32* %3, align 4
+  %8 = load i32, ptr %3, align 4
   %9 = insertelement <4 x i32> %7, i32 %8, i32 2
-  %10 = load i32, i32* %0, align 4
+  %10 = load i32, ptr %0, align 4
   %11 = insertelement <4 x i32> %9, i32 %10, i32 3
-  store <4 x i32> %11, <4 x i32>* %sink
+  store <4 x i32> %11, ptr %sink
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll
index 7ccb7e7190223..82e5dfa94f306 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user_jumbled_load.ll
@@ -3,30 +3,29 @@
 
 @array = external global [20 x [13 x i32]]
 
-define void @hoge(i64 %idx, <4 x i32>* %sink) {
+define void @hoge(i64 %idx, ptr %sink) {
 ; CHECK-LABEL: @hoge(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 [[IDX:%.*]], i64 5
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[SINK:%.*]], align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[SINK:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %0 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 5
-  %1 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 6
-  %2 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 7
-  %3 = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 %idx, i64 8
-  %4 = load i32, i32* %1, align 4
+  %0 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 5
+  %1 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 6
+  %2 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 7
+  %3 = getelementptr inbounds [20 x [13 x i32]], ptr @array, i64 0, i64 %idx, i64 8
+  %4 = load i32, ptr %1, align 4
   %5 = insertelement <4 x i32> undef, i32 %4, i32 0
-  %6 = load i32, i32* %2, align 4
+  %6 = load i32, ptr %2, align 4
   %7 = insertelement <4 x i32> %5, i32 %6, i32 1
-  %8 = load i32, i32* %3, align 4
+  %8 = load i32, ptr %3, align 4
   %9 = insertelement <4 x i32> %7, i32 %8, i32 2
-  %10 = load i32, i32* %0, align 4
+  %10 = load i32, ptr %0, align 4
   %11 = insertelement <4 x i32> %9, i32 %10, i32 3
-  store <4 x i32> %11, <4 x i32>* %sink
+  store <4 x i32> %11, ptr %sink
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
index 0e7626df8b5f4..7de9f4a3aee7c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
@@ -3,77 +3,68 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
-define void @fextr(double* %ptr) {
+define void @fextr(ptr %ptr) {
 ; CHECK-LABEL: @fextr(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* undef, align 16
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0
+; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, ptr undef, align 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <2 x double> [[LD]], <double 0.000000e+00, double 1.100000e+00>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP0]], ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %LD = load <2 x double>, <2 x double>* undef
+  %LD = load <2 x double>, ptr undef
   %V0 = extractelement <2 x double> %LD, i32 0
   %V1 = extractelement <2 x double> %LD, i32 1
-  %P0 = getelementptr inbounds double, double* %ptr, i64 0
-  %P1 = getelementptr inbounds double, double* %ptr, i64 1
+  %P1 = getelementptr inbounds double, ptr %ptr, i64 1
   %A0 = fadd double %V0, 0.0
   %A1 = fadd double %V1, 1.1
-  store double %A0, double* %P0, align 4
-  store double %A1, double* %P1, align 4
+  store double %A0, ptr %ptr, align 4
+  store double %A1, ptr %P1, align 4
   ret void
 }
 
-define void @fextr1(double* %ptr) {
+define void @fextr1(ptr %ptr) {
 ; CHECK-LABEL: @fextr1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* undef, align 16
-; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0
+; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, ptr undef, align 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <2 x double> [[LD]], <double 1.200000e+00, double 3.400000e+00>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[P1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    store <2 x double> [[SHUFFLE]], ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %LD = load <2 x double>, <2 x double>* undef
+  %LD = load <2 x double>, ptr undef
   %V0 = extractelement <2 x double> %LD, i32 0
   %V1 = extractelement <2 x double> %LD, i32 1
-  %P0 = getelementptr inbounds double, double* %ptr, i64 1  ; <--- incorrect order
-  %P1 = getelementptr inbounds double, double* %ptr, i64 0
+  %P0 = getelementptr inbounds double, ptr %ptr, i64 1  ; <--- incorrect order
   %A0 = fadd double %V0, 1.2
   %A1 = fadd double %V1, 3.4
-  store double %A0, double* %P0, align 4
-  store double %A1, double* %P1, align 4
+  store double %A0, ptr %P0, align 4
+  store double %A1, ptr %ptr, align 4
   ret void
 }
 
-define void @fextr2(double* %ptr) {
+define void @fextr2(ptr %ptr) {
 ; CHECK-LABEL: @fextr2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load <4 x double>, <4 x double>* undef, align 32
+; CHECK-NEXT:    [[LD:%.*]] = load <4 x double>, ptr undef, align 32
 ; CHECK-NEXT:    [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0
 ; CHECK-NEXT:    [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 5.500000e+00, double 6.600000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %LD = load <4 x double>, <4 x double>* undef
+  %LD = load <4 x double>, ptr undef
   %V0 = extractelement <4 x double> %LD, i32 0  ; <--- invalid size.
   %V1 = extractelement <4 x double> %LD, i32 1
-  %P0 = getelementptr inbounds double, double* %ptr, i64 0
-  %P1 = getelementptr inbounds double, double* %ptr, i64 1
+  %P1 = getelementptr inbounds double, ptr %ptr, i64 1
   %A0 = fadd double %V0, 5.5
   %A1 = fadd double %V1, 6.6
-  store double %A0, double* %P0, align 4
-  store double %A1, double* %P1, align 4
+  store double %A0, ptr %ptr, align 4
+  store double %A1, ptr %P1, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index 0c4ff04209f04..be6b0bc47c025 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -3,88 +3,84 @@
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
- at a = common global i64* null, align 8
+ at a = common global ptr null, align 8
 
 ; Function Attrs: nounwind ssp uwtable
 define i32 @fn1() {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** @a, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> <i64 11, i64 56>
-; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, <2 x ptr> [[SHUFFLE]], <2 x i64> <i64 11, i64 56>
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint <2 x ptr> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i64*, i64** @a, align 8
-  %add.ptr = getelementptr inbounds i64, i64* %0, i64 11
-  %1 = ptrtoint i64* %add.ptr to i64
-  store i64 %1, i64* %add.ptr, align 8
-  %add.ptr1 = getelementptr inbounds i64, i64* %0, i64 56
-  %2 = ptrtoint i64* %add.ptr1 to i64
-  %arrayidx2 = getelementptr inbounds i64, i64* %0, i64 12
-  store i64 %2, i64* %arrayidx2, align 8
+  %0 = load ptr, ptr @a, align 8
+  %add.ptr = getelementptr inbounds i64, ptr %0, i64 11
+  %1 = ptrtoint ptr %add.ptr to i64
+  store i64 %1, ptr %add.ptr, align 8
+  %add.ptr1 = getelementptr inbounds i64, ptr %0, i64 56
+  %2 = ptrtoint ptr %add.ptr1 to i64
+  %arrayidx2 = getelementptr inbounds i64, ptr %0, i64 12
+  store i64 %2, ptr %arrayidx2, align 8
   ret i32 undef
 }
 
 declare float @llvm.powi.f32.i32(float, i32)
-define void @fn2(i32* %a, i32* %b, float* %c) {
+define void @fn2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @fn2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP5]], i32 [[TMP6]])
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load i32, i32* %a, align 4
-  %i1 = load i32, i32* %b, align 4
+  %i0 = load i32, ptr %a, align 4
+  %i1 = load i32, ptr %b, align 4
   %add1 = add i32 %i0, %i1
   %fp1 = sitofp i32 %add1 to float
   %call1 = tail call float @llvm.powi.f32.i32(float %fp1,i32 %add1) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1
+  %i2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1
+  %i3 = load i32, ptr %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %fp2 = sitofp i32 %add2 to float
   %call2 = tail call float @llvm.powi.f32.i32(float %fp2,i32 %add1) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2
+  %i4 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2
+  %i5 = load i32, ptr %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %fp3 = sitofp i32 %add3 to float
   %call3 = tail call float @llvm.powi.f32.i32(float %fp3,i32 %add1) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3
+  %i6 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3
+  %i7 = load i32, ptr %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %fp4 = sitofp i32 %add4 to float
   %call4 = tail call float @llvm.powi.f32.i32(float %fp4,i32 %add1) nounwind readnone
 
-  store float %call1, float* %c, align 4
-  %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
-  store float %call2, float* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
-  store float %call3, float* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
-  store float %call4, float* %arrayidx10, align 4
+  store float %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds float, ptr %c, i32 1
+  store float %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float, ptr %c, i32 2
+  store float %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float, ptr %c, i32 3
+  store float %call4, ptr %arrayidx10, align 4
   ret void
 
 }
@@ -92,31 +88,29 @@ entry:
 define void @externally_used_ptrs() {
 ; CHECK-LABEL: @externally_used_ptrs(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** @a, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> <i64 56, i64 11>
-; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, <2 x ptr> [[SHUFFLE]], <2 x i64> <i64 56, i64 11>
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint <2 x ptr> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <2 x i64> [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i64*, i64** @a, align 8
-  %add.ptr = getelementptr inbounds i64, i64* %0, i64 11
-  %1 = ptrtoint i64* %add.ptr to i64
-  %add.ptr1 = getelementptr inbounds i64, i64* %0, i64 56
-  %2 = ptrtoint i64* %add.ptr1 to i64
-  %arrayidx2 = getelementptr inbounds i64, i64* %0, i64 12
-  %3 = load i64, i64* %arrayidx2, align 8
-  %4 = load i64, i64* %add.ptr, align 8
+  %0 = load ptr, ptr @a, align 8
+  %add.ptr = getelementptr inbounds i64, ptr %0, i64 11
+  %1 = ptrtoint ptr %add.ptr to i64
+  %add.ptr1 = getelementptr inbounds i64, ptr %0, i64 56
+  %2 = ptrtoint ptr %add.ptr1 to i64
+  %arrayidx2 = getelementptr inbounds i64, ptr %0, i64 12
+  %3 = load i64, ptr %arrayidx2, align 8
+  %4 = load i64, ptr %add.ptr, align 8
   %5 = add i64 %1, %3
   %6 = add i64 %2, %4
-  store i64 %6, i64* %add.ptr, align 8
-  store i64 %5, i64* %arrayidx2, align 8
+  store i64 %6, ptr %add.ptr, align 8
+  store i64 %5, ptr %arrayidx2, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll
index 5289514176710..0d60de7935032 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll
@@ -4,13 +4,11 @@
 ; Reproducer for an issue discussed here:
 ; https://reviews.llvm.org/D108703#2974289
 
-define void @test([4 x float]* nocapture %o, [2 x float]* nocapture nonnull readonly dereferenceable(8) %a, [2 x float]* nocapture nonnull readonly dereferenceable(8) %b, i32 signext %component) {
+define void @test(ptr nocapture %o, ptr nocapture nonnull readonly dereferenceable(8) %a, ptr nocapture nonnull readonly dereferenceable(8) %b, i32 signext %component) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [2 x float]* [[A:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [2 x float]* [[B:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[COMPONENT:%.*]] to i8
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP5]], i8 [[TMP4]]
@@ -20,25 +18,21 @@ define void @test([4 x float]* nocapture %o, [2 x float]* nocapture nonnull read
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP10]], i64 2
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP6]], i64 3
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast [4 x float]* [[O:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 1
+; CHECK-NEXT:    store <4 x float> [[TMP12]], ptr [[O:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = bitcast [2 x float]* %a to <2 x float>*
-  %1 = load <2 x float>, <2 x float>* %0, align 1
-  %2 = bitcast [2 x float]* %b to <2 x float>*
-  %3 = load <2 x float>, <2 x float>* %2, align 1
-  %4 = trunc i32 %component to i8
-  %5 = shufflevector <2 x float> %1, <2 x float> %3, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef>
-  %6 = extractelement <8 x float> %5, i8 %4
-  %7 = insertelement <4 x float> undef, float %6, i64 0
-  %8 = extractelement <2 x float> %3, i32 1
-  %9 = insertelement <4 x float> %7, float %8, i64 1
-  %10 = extractelement <2 x float> %1, i32 1
-  %11 = insertelement <4 x float> %9, float %10, i64 2
-  %12 = insertelement <4 x float> %11, float %6, i64 3
-  %13 = bitcast [4 x float]* %o to <4 x float>*
-  store <4 x float> %12, <4 x float>* %13, align 1
+  %0 = load <2 x float>, ptr %a, align 1
+  %1 = load <2 x float>, ptr %b, align 1
+  %2 = trunc i32 %component to i8
+  %3 = shufflevector <2 x float> %0, <2 x float> %1, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef>
+  %4 = extractelement <8 x float> %3, i8 %2
+  %5 = insertelement <4 x float> undef, float %4, i64 0
+  %6 = extractelement <2 x float> %1, i32 1
+  %7 = insertelement <4 x float> %5, float %6, i64 1
+  %8 = extractelement <2 x float> %0, i32 1
+  %9 = insertelement <4 x float> %7, float %8, i64 2
+  %10 = insertelement <4 x float> %9, float %4, i64 3
+  store <4 x float> %10, ptr %o, align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
index 5d83491b0c5da..1374e9873e1c5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
+define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
@@ -13,8 +13,7 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]]
 ; CHECK-NEXT:    [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP6]], [[M]]
@@ -24,19 +23,19 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
 entry:
   %mul = mul nsw i32 %n, 5
   %add = add nsw i32 %mul, 9
-  store i32 %add, i32* %A, align 4
+  store i32 %add, ptr %A, align 4
   %mul1 = mul nsw i32 %n, 9
   %add2 = add nsw i32 %mul1, 9
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
-  store i32 %add2, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 1
+  store i32 %add2, ptr %arrayidx3, align 4
   %mul4 = shl i32 %n, 3
   %add5 = add nsw i32 %mul4, 9
-  %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 2
-  store i32 %add5, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %A, i64 2
+  store i32 %add5, ptr %arrayidx6, align 4
   %mul7 = mul nsw i32 %n, 10
   %add8 = add nsw i32 %mul7, 9
-  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 3
-  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %A, i64 3
+  store i32 %add8, ptr %arrayidx9, align 4
   %externaluse1 = add nsw i32 %add, %m
   %externaluse2 = mul nsw i32 %add, %m  ; we should add the extract cost only once and the store will be vectorized
   %add10 = add nsw i32 %externaluse1, %externaluse2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll
index 46bcd33692dff..8c51a907998a4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-insertpoint.ll
@@ -12,7 +12,7 @@ define i32 @crash() {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul double [[ADD1]], 0.000000e+00
-; CHECK-NEXT:    store double [[MUL1]], double* null, align 16
+; CHECK-NEXT:    store double [[MUL1]], ptr null, align 16
 ; CHECK-NEXT:    ret i32 0
 ;
 label:
@@ -26,6 +26,6 @@ label:
   %mul = fmul double %extract0, %0
   %add1 = fadd double %2, %mul
   %mul1 = fmul double %add1, 0.000000e+00
-  store double %mul1, double* null, align 16
+  store double %mul1, ptr null, align 16
   ret i32 0
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
index e24d3a6c6776e..6bff6d9b45a3d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
@@ -42,7 +42,7 @@ define float @f_used_out_of_tree(<2 x float> %x) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    store float [[ADD]], float* @a, align 4
+; CHECK-NEXT:    store float [[ADD]], ptr @a, align 4
 ; CHECK-NEXT:    ret float [[TMP1]]
 ;
 ; THRESH1-LABEL: @f_used_out_of_tree(
@@ -51,7 +51,7 @@ define float @f_used_out_of_tree(<2 x float> %x) {
 ; THRESH1-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; THRESH1-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; THRESH1-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; THRESH1-NEXT:    store float [[ADD]], float* @a, align 4
+; THRESH1-NEXT:    store float [[ADD]], ptr @a, align 4
 ; THRESH1-NEXT:    ret float [[TMP1]]
 ;
 ; THRESH2-LABEL: @f_used_out_of_tree(
@@ -60,7 +60,7 @@ define float @f_used_out_of_tree(<2 x float> %x) {
 ; THRESH2-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; THRESH2-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; THRESH2-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; THRESH2-NEXT:    store float [[ADD]], float* @a, align 4
+; THRESH2-NEXT:    store float [[ADD]], ptr @a, align 4
 ; THRESH2-NEXT:    ret float [[TMP1]]
 ;
   %x0 = extractelement <2 x float> %x, i32 0
@@ -68,7 +68,7 @@ define float @f_used_out_of_tree(<2 x float> %x) {
   %x0x0 = fmul float %x0, %x0
   %x1x1 = fmul float %x1, %x1
   %add = fadd float %x0x0, %x1x1
-  store float %add, float* @a
+  store float %add, ptr @a
   ret float %x0
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fabs.ll b/llvm/test/Transforms/SLPVectorizer/X86/fabs.ll
index 821a1adfb2e0d..e91ce17120b49 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fabs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fabs.ll
@@ -22,90 +22,90 @@ declare double @llvm.fabs.f64(double)
 
 define void @fabs_2f64() #0 {
 ; CHECK-LABEL: @fabs_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %fabs0 = call double @llvm.fabs.f64(double %a0)
   %fabs1 = call double @llvm.fabs.f64(double %a1)
-  store double %fabs0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fabs1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %fabs0, ptr @dst64, align 8
+  store double %fabs1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @fabs_4f64() #0 {
 ; SSE-LABEL: @fabs_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP3]])
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fabs_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %fabs0 = call double @llvm.fabs.f64(double %a0)
   %fabs1 = call double @llvm.fabs.f64(double %a1)
   %fabs2 = call double @llvm.fabs.f64(double %a2)
   %fabs3 = call double @llvm.fabs.f64(double %a3)
-  store double %fabs0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fabs1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %fabs2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %fabs3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %fabs0, ptr @dst64, align 8
+  store double %fabs1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %fabs2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %fabs3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @fabs_8f64() #0 {
 ; SSE-LABEL: @fabs_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1]])
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP3]])
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP7]])
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fabs_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 4
 ; AVX256-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP1]])
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 4
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP3]])
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fabs_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.fabs.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @src64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 4
   %fabs0 = call double @llvm.fabs.f64(double %a0)
   %fabs1 = call double @llvm.fabs.f64(double %a1)
   %fabs2 = call double @llvm.fabs.f64(double %a2)
@@ -114,63 +114,63 @@ define void @fabs_8f64() #0 {
   %fabs5 = call double @llvm.fabs.f64(double %a5)
   %fabs6 = call double @llvm.fabs.f64(double %a6)
   %fabs7 = call double @llvm.fabs.f64(double %a7)
-  store double %fabs0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %fabs1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %fabs2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %fabs3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %fabs4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %fabs5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %fabs6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %fabs7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %fabs0, ptr @dst64, align 4
+  store double %fabs1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %fabs2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %fabs3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %fabs4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %fabs5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %fabs6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %fabs7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fabs_4f32() #0 {
 ; CHECK-LABEL: @fabs_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %fabs0 = call float @llvm.fabs.f32(float %a0)
   %fabs1 = call float @llvm.fabs.f32(float %a1)
   %fabs2 = call float @llvm.fabs.f32(float %a2)
   %fabs3 = call float @llvm.fabs.f32(float %a3)
-  store float %fabs0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fabs1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fabs2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fabs3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %fabs0, ptr @dst32, align 4
+  store float %fabs1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fabs2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fabs3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @fabs_8f32() #0 {
 ; SSE-LABEL: @fabs_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP3]])
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fabs_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %fabs0 = call float @llvm.fabs.f32(float %a0)
   %fabs1 = call float @llvm.fabs.f32(float %a1)
   %fabs2 = call float @llvm.fabs.f32(float %a2)
@@ -179,64 +179,64 @@ define void @fabs_8f32() #0 {
   %fabs5 = call float @llvm.fabs.f32(float %a5)
   %fabs6 = call float @llvm.fabs.f32(float %a6)
   %fabs7 = call float @llvm.fabs.f32(float %a7)
-  store float %fabs0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fabs1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fabs2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fabs3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %fabs4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %fabs5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %fabs6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %fabs7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %fabs0, ptr @dst32, align 4
+  store float %fabs1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fabs2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fabs3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %fabs4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %fabs5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %fabs6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %fabs7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fabs_16f32() #0 {
 ; SSE-LABEL: @fabs_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]])
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP3]])
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]])
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fabs_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX256-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[TMP1]])
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[TMP3]])
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fabs_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.fabs.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @src32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %fabs0  = call float @llvm.fabs.f32(float %a0 )
   %fabs1  = call float @llvm.fabs.f32(float %a1 )
   %fabs2  = call float @llvm.fabs.f32(float %a2 )
@@ -253,22 +253,22 @@ define void @fabs_16f32() #0 {
   %fabs13 = call float @llvm.fabs.f32(float %a13)
   %fabs14 = call float @llvm.fabs.f32(float %a14)
   %fabs15 = call float @llvm.fabs.f32(float %a15)
-  store float %fabs0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %fabs1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %fabs2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %fabs3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %fabs4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %fabs5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %fabs6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %fabs7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %fabs8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %fabs9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %fabs10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %fabs11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %fabs12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %fabs13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %fabs14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %fabs15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %fabs0 , ptr @dst32, align 4
+  store float %fabs1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %fabs2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %fabs3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %fabs4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %fabs5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %fabs6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %fabs7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %fabs8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %fabs9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %fabs10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %fabs11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %fabs12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %fabs13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %fabs14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %fabs15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fcopysign.ll b/llvm/test/Transforms/SLPVectorizer/X86/fcopysign.ll
index b687d9e4a07f7..c3334af088751 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fcopysign.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fcopysign.ll
@@ -26,115 +26,115 @@ declare double @llvm.copysign.f64(double, double)
 
 define void @fcopysign_2f64() #0 {
 ; CHECK-LABEL: @fcopysign_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
   %fcopysign0 = call double @llvm.copysign.f64(double %a0, double %b0)
   %fcopysign1 = call double @llvm.copysign.f64(double %a1, double %b1)
-  store double %fcopysign0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fcopysign1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %fcopysign0, ptr @dst64, align 8
+  store double %fcopysign1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @fcopysign_4f64() #0 {
 ; SSE-LABEL: @fcopysign_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fcopysign_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.copysign.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
-; AVX-NEXT:    store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
   %fcopysign0 = call double @llvm.copysign.f64(double %a0, double %b0)
   %fcopysign1 = call double @llvm.copysign.f64(double %a1, double %b1)
   %fcopysign2 = call double @llvm.copysign.f64(double %a2, double %b2)
   %fcopysign3 = call double @llvm.copysign.f64(double %a3, double %b3)
-  store double %fcopysign0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fcopysign1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %fcopysign2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %fcopysign3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %fcopysign0, ptr @dst64, align 8
+  store double %fcopysign1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %fcopysign2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %fcopysign3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @fcopysign_8f64() #0 {
 ; SSE-LABEL: @fcopysign_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]])
-; SSE-NEXT:    store <2 x double> [[TMP9]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP9]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP10]], <2 x double> [[TMP11]])
-; SSE-NEXT:    store <2 x double> [[TMP12]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP12]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fcopysign_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.copysign.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
-; AVX256-NEXT:    store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = call <4 x double> @llvm.copysign.v4f64(<4 x double> [[TMP4]], <4 x double> [[TMP5]])
-; AVX256-NEXT:    store <4 x double> [[TMP6]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fcopysign_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcA64 to <8 x double>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcB64 to <8 x double>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @srcA64, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, ptr @srcB64, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x double> @llvm.copysign.v8f64(<8 x double> [[TMP1]], <8 x double> [[TMP2]])
-; AVX512-NEXT:    store <8 x double> [[TMP3]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT:    store <8 x double> [[TMP3]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
-  %b4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
-  %b5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
-  %b6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
-  %b7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @srcA64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
+  %b0 = load double, ptr @srcB64, align 4
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
+  %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+  %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
+  %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+  %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
   %fcopysign0 = call double @llvm.copysign.f64(double %a0, double %b0)
   %fcopysign1 = call double @llvm.copysign.f64(double %a1, double %b1)
   %fcopysign2 = call double @llvm.copysign.f64(double %a2, double %b2)
@@ -143,79 +143,79 @@ define void @fcopysign_8f64() #0 {
   %fcopysign5 = call double @llvm.copysign.f64(double %a5, double %b5)
   %fcopysign6 = call double @llvm.copysign.f64(double %a6, double %b6)
   %fcopysign7 = call double @llvm.copysign.f64(double %a7, double %b7)
-  store double %fcopysign0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %fcopysign1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %fcopysign2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %fcopysign3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %fcopysign4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %fcopysign5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %fcopysign6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %fcopysign7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %fcopysign0, ptr @dst64, align 4
+  store double %fcopysign1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %fcopysign2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %fcopysign3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %fcopysign4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %fcopysign5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %fcopysign6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %fcopysign7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fcopysign_4f32() #0 {
 ; CHECK-LABEL: @fcopysign_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
   %fcopysign0 = call float @llvm.copysign.f32(float %a0, float %b0)
   %fcopysign1 = call float @llvm.copysign.f32(float %a1, float %b1)
   %fcopysign2 = call float @llvm.copysign.f32(float %a2, float %b2)
   %fcopysign3 = call float @llvm.copysign.f32(float %a3, float %b3)
-  store float %fcopysign0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fcopysign1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fcopysign2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fcopysign3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %fcopysign0, ptr @dst32, align 4
+  store float %fcopysign1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fcopysign2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fcopysign3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @fcopysign_8f32() #0 {
 ; SSE-LABEL: @fcopysign_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; SSE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fcopysign_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.copysign.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]])
-; AVX-NEXT:    store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP3]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-  %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-  %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-  %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %b4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+  %b5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+  %b6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+  %b7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
   %fcopysign0 = call float @llvm.copysign.f32(float %a0, float %b0)
   %fcopysign1 = call float @llvm.copysign.f32(float %a1, float %b1)
   %fcopysign2 = call float @llvm.copysign.f32(float %a2, float %b2)
@@ -224,87 +224,87 @@ define void @fcopysign_8f32() #0 {
   %fcopysign5 = call float @llvm.copysign.f32(float %a5, float %b5)
   %fcopysign6 = call float @llvm.copysign.f32(float %a6, float %b6)
   %fcopysign7 = call float @llvm.copysign.f32(float %a7, float %b7)
-  store float %fcopysign0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fcopysign1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fcopysign2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fcopysign3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %fcopysign4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %fcopysign5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %fcopysign6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %fcopysign7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %fcopysign0, ptr @dst32, align 4
+  store float %fcopysign1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fcopysign2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fcopysign3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %fcopysign4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %fcopysign5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %fcopysign6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %fcopysign7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fcopysign_16f32() #0 {
 ; SSE-LABEL: @fcopysign_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; SSE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]])
-; SSE-NEXT:    store <4 x float> [[TMP9]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP10]], <4 x float> [[TMP11]])
-; SSE-NEXT:    store <4 x float> [[TMP12]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fcopysign_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.copysign.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]])
-; AVX256-NEXT:    store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP3]], ptr @dst32, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.copysign.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP5]])
-; AVX256-NEXT:    store <8 x float> [[TMP6]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fcopysign_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcA32 to <16 x float>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcB32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @srcA32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, ptr @srcB32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x float> @llvm.copysign.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP2]])
-; AVX512-NEXT:    store <16 x float> [[TMP3]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP3]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
-  %b0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  0), align 4
-  %b1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  1), align 4
-  %b2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  2), align 4
-  %b3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  3), align 4
-  %b4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  4), align 4
-  %b5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  5), align 4
-  %b6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  6), align 4
-  %b7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  7), align 4
-  %b8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  8), align 4
-  %b9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  9), align 4
-  %b10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
-  %b11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
-  %b12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
-  %b13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
-  %b14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
-  %b15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @srcA32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 15), align 4
+  %b0  = load float, ptr @srcB32, align 4
+  %b1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  1), align 4
+  %b2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  2), align 4
+  %b3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  3), align 4
+  %b4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  4), align 4
+  %b5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  5), align 4
+  %b6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  6), align 4
+  %b7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  7), align 4
+  %b8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  8), align 4
+  %b9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  9), align 4
+  %b10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 10), align 4
+  %b11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 11), align 4
+  %b12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+  %b13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 13), align 4
+  %b14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 14), align 4
+  %b15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 15), align 4
   %fcopysign0  = call float @llvm.copysign.f32(float %a0 , float %b0 )
   %fcopysign1  = call float @llvm.copysign.f32(float %a1 , float %b1 )
   %fcopysign2  = call float @llvm.copysign.f32(float %a2 , float %b2 )
@@ -321,22 +321,22 @@ define void @fcopysign_16f32() #0 {
   %fcopysign13 = call float @llvm.copysign.f32(float %a13, float %b13)
   %fcopysign14 = call float @llvm.copysign.f32(float %a14, float %b14)
   %fcopysign15 = call float @llvm.copysign.f32(float %a15, float %b15)
-  store float %fcopysign0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %fcopysign1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %fcopysign2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %fcopysign3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %fcopysign4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %fcopysign5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %fcopysign6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %fcopysign7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %fcopysign8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %fcopysign9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %fcopysign10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %fcopysign11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %fcopysign12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %fcopysign13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %fcopysign14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %fcopysign15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %fcopysign0 , ptr @dst32, align 4
+  store float %fcopysign1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %fcopysign2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %fcopysign3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %fcopysign4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %fcopysign5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %fcopysign6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %fcopysign7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %fcopysign8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %fcopysign9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %fcopysign10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %fcopysign11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %fcopysign12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %fcopysign13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %fcopysign14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %fcopysign15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/flag.ll b/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
index 084975e98c50e..1a7993b99826a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
@@ -5,24 +5,24 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Check that the command line flag works.
-define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+define i32 @rollable(ptr noalias nocapture %in, ptr noalias nocapture %out, i64 %n) {
 ; CHECK-LABEL: @rollable(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[TMP26:%.*]], [[DOTLR_PH]] ], [ 0, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[I_019]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP2]], 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 3
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP4]], 7
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], 7
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP7]], 7
@@ -31,14 +31,14 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
 ; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[TMP18]], 21
 ; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP13]], 7
 ; CHECK-NEXT:    [[TMP21:%.*]] = add i32 [[TMP20]], 28
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    store i32 [[TMP15]], i32* [[TMP22]], align 4
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP5]]
-; CHECK-NEXT:    store i32 [[TMP17]], i32* [[TMP23]], align 4
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP8]]
-; CHECK-NEXT:    store i32 [[TMP19]], i32* [[TMP24]], align 4
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP11]]
-; CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    store i32 [[TMP15]], ptr [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP5]]
+; CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 [[TMP19]], ptr [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP11]]
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP26]] = add i64 [[I_019]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP26]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]]
@@ -51,17 +51,17 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
-  %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %in, i64 %2
+  %4 = load i32, ptr %3, align 4
   %5 = or i64 %2, 1
-  %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32, i32* %6, align 4
+  %6 = getelementptr inbounds i32, ptr %in, i64 %5
+  %7 = load i32, ptr %6, align 4
   %8 = or i64 %2, 2
-  %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32, i32* %9, align 4
+  %9 = getelementptr inbounds i32, ptr %in, i64 %8
+  %10 = load i32, ptr %9, align 4
   %11 = or i64 %2, 3
-  %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32, i32* %12, align 4
+  %12 = getelementptr inbounds i32, ptr %in, i64 %11
+  %13 = load i32, ptr %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
@@ -70,14 +70,14 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
   %19 = add i32 %18, 21
   %20 = mul i32 %13, 7
   %21 = add i32 %20, 28
-  %22 = getelementptr inbounds i32, i32* %out, i64 %2
-  store i32 %15, i32* %22, align 4
-  %23 = getelementptr inbounds i32, i32* %out, i64 %5
-  store i32 %17, i32* %23, align 4
-  %24 = getelementptr inbounds i32, i32* %out, i64 %8
-  store i32 %19, i32* %24, align 4
-  %25 = getelementptr inbounds i32, i32* %out, i64 %11
-  store i32 %21, i32* %25, align 4
+  %22 = getelementptr inbounds i32, ptr %out, i64 %2
+  store i32 %15, ptr %22, align 4
+  %23 = getelementptr inbounds i32, ptr %out, i64 %5
+  store i32 %17, ptr %23, align 4
+  %24 = getelementptr inbounds i32, ptr %out, i64 %8
+  store i32 %19, ptr %24, align 4
+  %25 = getelementptr inbounds i32, ptr %out, i64 %11
+  store i32 %21, ptr %25, align 4
   %26 = add i64 %i.019, 1
   %exitcond = icmp eq i64 %26, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/float-min-max.ll b/llvm/test/Transforms/SLPVectorizer/X86/float-min-max.ll
index 123608d4bb338..79c7b053a0152 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/float-min-max.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/float-min-max.ll
@@ -5,37 +5,31 @@ target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f8
 target triple = "i386-unknown-linux-gnu"
 
 ; Make sure we do not crash while computing the cost for @test.
-define i1 @test(float* %p1, float* %p2, i8* %p3, i1 %c) #0 {
+define i1 @test(ptr %p1, ptr %p2, ptr %p3, i1 %c) #0 {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[SCEVGEP21:%.*]] = getelementptr float, float* [[P1:%.*]], i32 0
-; CHECK-NEXT:    [[L0:%.*]] = icmp ult float* [[P2:%.*]], [[SCEVGEP21]]
-; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[L0]], float* [[P2]], float* [[SCEVGEP21]]
-; CHECK-NEXT:    [[UMIN22:%.*]] = bitcast float* [[UMIN]] to i8*
-; CHECK-NEXT:    [[SCEVGEP31:%.*]] = getelementptr float, float* [[P1]], i32 1
-; CHECK-NEXT:    [[L1:%.*]] = icmp ult float* [[SCEVGEP31]], [[P2]]
-; CHECK-NEXT:    [[UMIN33:%.*]] = select i1 [[L1]], float* [[SCEVGEP31]], float* [[P2]]
-; CHECK-NEXT:    [[UMIN3334:%.*]] = bitcast float* [[UMIN33]] to i8*
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i8* [[P3:%.*]], [[UMIN22]]
+; CHECK-NEXT:    [[L0:%.*]] = icmp ult ptr [[P2:%.*]], [[P1:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[L0]], ptr [[P2]], ptr [[P1]]
+; CHECK-NEXT:    [[SCEVGEP31:%.*]] = getelementptr float, ptr [[P1]], i32 1
+; CHECK-NEXT:    [[L1:%.*]] = icmp ult ptr [[SCEVGEP31]], [[P2]]
+; CHECK-NEXT:    [[UMIN33:%.*]] = select i1 [[L1]], ptr [[SCEVGEP31]], ptr [[P2]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[P3:%.*]], [[UMIN]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[C:%.*]]
 ; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[C]]
-; CHECK-NEXT:    [[BOUND042:%.*]] = icmp ugt i8* [[P3]], [[UMIN3334]]
+; CHECK-NEXT:    [[BOUND042:%.*]] = icmp ugt ptr [[P3]], [[UMIN33]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT44:%.*]] = and i1 [[BOUND042]], [[C]]
 ; CHECK-NEXT:    [[CONFLICT_RDX45:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT44]]
 ; CHECK-NEXT:    [[CONFLICT_RDX49:%.*]] = or i1 [[CONFLICT_RDX45]], [[C]]
 ; CHECK-NEXT:    ret i1 [[CONFLICT_RDX49]]
 ;
-  %scevgep21 = getelementptr float, float* %p1, i32 0
-  %l0 = icmp ult float* %p2, %scevgep21
-  %umin = select i1 %l0, float* %p2, float* %scevgep21
-  %umin22 = bitcast float* %umin to i8*
-  %scevgep31 = getelementptr float, float* %p1, i32 1
-  %l1 = icmp ult float* %scevgep31, %p2
-  %umin33 = select i1 %l1, float* %scevgep31, float* %p2
-  %umin3334 = bitcast float* %umin33 to i8*
-  %bound0 = icmp ugt i8* %p3, %umin22
+  %l0 = icmp ult ptr %p2, %p1
+  %umin = select i1 %l0, ptr %p2, ptr %p1
+  %scevgep31 = getelementptr float, ptr %p1, i32 1
+  %l1 = icmp ult ptr %scevgep31, %p2
+  %umin33 = select i1 %l1, ptr %scevgep31, ptr %p2
+  %bound0 = icmp ugt ptr %p3, %umin
   %found.conflict = and i1 %bound0, %c
   %conflict.rdx = or i1 %found.conflict, %c
-  %bound042 = icmp ugt i8* %p3, %umin3334
+  %bound042 = icmp ugt ptr %p3, %umin33
   %found.conflict44 = and i1 %bound042, %c
   %conflict.rdx45 = or i1 %conflict.rdx, %found.conflict44
   %conflict.rdx49 = or i1 %conflict.rdx45, %c

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fma.ll b/llvm/test/Transforms/SLPVectorizer/X86/fma.ll
index be5d064be6d4b..e7d51eb805e74 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fma.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fma.ll
@@ -26,120 +26,120 @@ declare double @llvm.fma.f64(double, double, double)
 
 define void @fma_2f64() #0 {
 ; NO-FMA-LABEL: @fma_2f64(
-; NO-FMA-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-; NO-FMA-NEXT:    [[B0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    [[B1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-; NO-FMA-NEXT:    [[C0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    [[C1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    [[A0:%.*]] = load double, ptr @srcA64, align 8
+; NO-FMA-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    [[B0:%.*]] = load double, ptr @srcB64, align 8
+; NO-FMA-NEXT:    [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    [[C0:%.*]] = load double, ptr @srcC64, align 8
+; NO-FMA-NEXT:    [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
 ; NO-FMA-NEXT:    [[FMA0:%.*]] = call double @llvm.fma.f64(double [[A0]], double [[B0]], double [[C0]])
 ; NO-FMA-NEXT:    [[FMA1:%.*]] = call double @llvm.fma.f64(double [[A1]], double [[B1]], double [[C1]])
-; NO-FMA-NEXT:    store double [[FMA0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    store double [[FMA1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    store double [[FMA0]], ptr @dst64, align 8
+; NO-FMA-NEXT:    store double [[FMA1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; NO-FMA-NEXT:    ret void
 ;
 ; FMA-LABEL: @fma_2f64(
-; FMA-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; FMA-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
-; FMA-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcC64 to <2 x double>*), align 8
+; FMA-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; FMA-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
+; FMA-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
 ; FMA-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
-; FMA-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; FMA-NEXT:    store <2 x double> [[TMP4]], ptr @dst64, align 8
 ; FMA-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
-  %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %c0 = load double, ptr @srcC64, align 8
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
   %fma0 = call double @llvm.fma.f64(double %a0, double %b0, double %c0)
   %fma1 = call double @llvm.fma.f64(double %a1, double %b1, double %c1)
-  store double %fma0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fma1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %fma0, ptr @dst64, align 8
+  store double %fma1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @fma_4f64() #0 {
 ; NO-FMA-LABEL: @fma_4f64(
-; NO-FMA-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-; NO-FMA-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
-; NO-FMA-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
-; NO-FMA-NEXT:    [[B0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    [[B1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-; NO-FMA-NEXT:    [[B2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
-; NO-FMA-NEXT:    [[B3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
-; NO-FMA-NEXT:    [[C0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    [[C1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
-; NO-FMA-NEXT:    [[C2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 8
-; NO-FMA-NEXT:    [[C3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 8
+; NO-FMA-NEXT:    [[A0:%.*]] = load double, ptr @srcA64, align 8
+; NO-FMA-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+; NO-FMA-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
+; NO-FMA-NEXT:    [[B0:%.*]] = load double, ptr @srcB64, align 8
+; NO-FMA-NEXT:    [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    [[B2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+; NO-FMA-NEXT:    [[B3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
+; NO-FMA-NEXT:    [[C0:%.*]] = load double, ptr @srcC64, align 8
+; NO-FMA-NEXT:    [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    [[C2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 8
+; NO-FMA-NEXT:    [[C3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 3), align 8
 ; NO-FMA-NEXT:    [[FMA0:%.*]] = call double @llvm.fma.f64(double [[A0]], double [[B0]], double [[C0]])
 ; NO-FMA-NEXT:    [[FMA1:%.*]] = call double @llvm.fma.f64(double [[A1]], double [[B1]], double [[C1]])
 ; NO-FMA-NEXT:    [[FMA2:%.*]] = call double @llvm.fma.f64(double [[A2]], double [[B2]], double [[C2]])
 ; NO-FMA-NEXT:    [[FMA3:%.*]] = call double @llvm.fma.f64(double [[A3]], double [[B3]], double [[C3]])
-; NO-FMA-NEXT:    store double [[FMA0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; NO-FMA-NEXT:    store double [[FMA1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; NO-FMA-NEXT:    store double [[FMA2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; NO-FMA-NEXT:    store double [[FMA3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; NO-FMA-NEXT:    store double [[FMA0]], ptr @dst64, align 8
+; NO-FMA-NEXT:    store double [[FMA1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; NO-FMA-NEXT:    store double [[FMA2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; NO-FMA-NEXT:    store double [[FMA3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; NO-FMA-NEXT:    ret void
 ;
 ; FMA-LABEL: @fma_4f64(
-; FMA-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 8
-; FMA-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 8
-; FMA-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcC64 to <4 x double>*), align 8
+; FMA-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8
+; FMA-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8
+; FMA-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr @srcC64, align 8
 ; FMA-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]])
-; FMA-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; FMA-NEXT:    store <4 x double> [[TMP4]], ptr @dst64, align 8
 ; FMA-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
-  %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
-  %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
-  %c2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 8
-  %c3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
+  %c0 = load double, ptr @srcC64, align 8
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
+  %c2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 8
+  %c3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 3), align 8
   %fma0 = call double @llvm.fma.f64(double %a0, double %b0, double %c0)
   %fma1 = call double @llvm.fma.f64(double %a1, double %b1, double %c1)
   %fma2 = call double @llvm.fma.f64(double %a2, double %b2, double %c2)
   %fma3 = call double @llvm.fma.f64(double %a3, double %b3, double %c3)
-  store double %fma0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fma1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %fma2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %fma3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %fma0, ptr @dst64, align 8
+  store double %fma1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %fma2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %fma3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @fma_8f64() #0 {
 ; NO-FMA-LABEL: @fma_8f64(
-; NO-FMA-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[B0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[B1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[B2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[B3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[B4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[B5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[B6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[B7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[C0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[C1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[C2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[C3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[C4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[C5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[C6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[C7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[A0:%.*]] = load double, ptr @srcA64, align 4
+; NO-FMA-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[B0:%.*]] = load double, ptr @srcB64, align 4
+; NO-FMA-NEXT:    [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[B2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[B3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[B4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[B5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[B6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[B7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[C0:%.*]] = load double, ptr @srcC64, align 4
+; NO-FMA-NEXT:    [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[C2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[C3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[C4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[C5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[C6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[C7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 7), align 4
 ; NO-FMA-NEXT:    [[FMA0:%.*]] = call double @llvm.fma.f64(double [[A0]], double [[B0]], double [[C0]])
 ; NO-FMA-NEXT:    [[FMA1:%.*]] = call double @llvm.fma.f64(double [[A1]], double [[B1]], double [[C1]])
 ; NO-FMA-NEXT:    [[FMA2:%.*]] = call double @llvm.fma.f64(double [[A2]], double [[B2]], double [[C2]])
@@ -148,61 +148,61 @@ define void @fma_8f64() #0 {
 ; NO-FMA-NEXT:    [[FMA5:%.*]] = call double @llvm.fma.f64(double [[A5]], double [[B5]], double [[C5]])
 ; NO-FMA-NEXT:    [[FMA6:%.*]] = call double @llvm.fma.f64(double [[A6]], double [[B6]], double [[C6]])
 ; NO-FMA-NEXT:    [[FMA7:%.*]] = call double @llvm.fma.f64(double [[A7]], double [[B7]], double [[C7]])
-; NO-FMA-NEXT:    store double [[FMA0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    store double [[FMA1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    store double [[FMA2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    store double [[FMA3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    store double [[FMA4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    store double [[FMA5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    store double [[FMA6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    store double [[FMA7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    store double [[FMA0]], ptr @dst64, align 4
+; NO-FMA-NEXT:    store double [[FMA1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    store double [[FMA2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    store double [[FMA3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    store double [[FMA4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    store double [[FMA5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    store double [[FMA6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    store double [[FMA7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
 ; NO-FMA-NEXT:    ret void
 ;
 ; FMA256-LABEL: @fma_8f64(
-; FMA256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 4
-; FMA256-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 4
-; FMA256-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcC64 to <4 x double>*), align 4
+; FMA256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4
+; FMA256-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4
+; FMA256-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr @srcC64, align 4
 ; FMA256-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]])
-; FMA256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; FMA256-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <4 x double>*), align 4
-; FMA256-NEXT:    [[TMP6:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <4 x double>*), align 4
-; FMA256-NEXT:    [[TMP7:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4) to <4 x double>*), align 4
+; FMA256-NEXT:    store <4 x double> [[TMP4]], ptr @dst64, align 4
+; FMA256-NEXT:    [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; FMA256-NEXT:    [[TMP6:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+; FMA256-NEXT:    [[TMP7:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 4), align 4
 ; FMA256-NEXT:    [[TMP8:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x double> [[TMP7]])
-; FMA256-NEXT:    store <4 x double> [[TMP8]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; FMA256-NEXT:    store <4 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; FMA256-NEXT:    ret void
 ;
 ; FMA512-LABEL: @fma_8f64(
-; FMA512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcA64 to <8 x double>*), align 4
-; FMA512-NEXT:    [[TMP2:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcB64 to <8 x double>*), align 4
-; FMA512-NEXT:    [[TMP3:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcC64 to <8 x double>*), align 4
+; FMA512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @srcA64, align 4
+; FMA512-NEXT:    [[TMP2:%.*]] = load <8 x double>, ptr @srcB64, align 4
+; FMA512-NEXT:    [[TMP3:%.*]] = load <8 x double>, ptr @srcC64, align 4
 ; FMA512-NEXT:    [[TMP4:%.*]] = call <8 x double> @llvm.fma.v8f64(<8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x double> [[TMP3]])
-; FMA512-NEXT:    store <8 x double> [[TMP4]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; FMA512-NEXT:    store <8 x double> [[TMP4]], ptr @dst64, align 4
 ; FMA512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
-  %b4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
-  %b5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
-  %b6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
-  %b7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
-  %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 4
-  %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 4
-  %c2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 4
-  %c3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 4
-  %c4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4), align 4
-  %c5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 5), align 4
-  %c6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 6), align 4
-  %c7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @srcA64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
+  %b0 = load double, ptr @srcB64, align 4
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
+  %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+  %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
+  %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+  %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
+  %c0 = load double, ptr @srcC64, align 4
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 4
+  %c2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 4
+  %c3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 3), align 4
+  %c4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 4), align 4
+  %c5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 5), align 4
+  %c6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 6), align 4
+  %c7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 7), align 4
   %fma0 = call double @llvm.fma.f64(double %a0, double %b0, double %c0)
   %fma1 = call double @llvm.fma.f64(double %a1, double %b1, double %c1)
   %fma2 = call double @llvm.fma.f64(double %a2, double %b2, double %c2)
@@ -211,98 +211,98 @@ define void @fma_8f64() #0 {
   %fma5 = call double @llvm.fma.f64(double %a5, double %b5, double %c5)
   %fma6 = call double @llvm.fma.f64(double %a6, double %b6, double %c6)
   %fma7 = call double @llvm.fma.f64(double %a7, double %b7, double %c7)
-  store double %fma0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %fma1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %fma2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %fma3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %fma4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %fma5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %fma6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %fma7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %fma0, ptr @dst64, align 4
+  store double %fma1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %fma2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %fma3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %fma4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %fma5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %fma6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %fma7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fma_4f32() #0 {
 ; NO-FMA-LABEL: @fma_4f32(
-; NO-FMA-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[B0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[B1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[B2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[B3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[C0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[C1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[C2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[C3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[A0:%.*]] = load float, ptr @srcA32, align 4
+; NO-FMA-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[B0:%.*]] = load float, ptr @srcB32, align 4
+; NO-FMA-NEXT:    [[B1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[B2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[B3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[C0:%.*]] = load float, ptr @srcC32, align 4
+; NO-FMA-NEXT:    [[C1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[C2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[C3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
 ; NO-FMA-NEXT:    [[FMA0:%.*]] = call float @llvm.fma.f32(float [[A0]], float [[B0]], float [[C0]])
 ; NO-FMA-NEXT:    [[FMA1:%.*]] = call float @llvm.fma.f32(float [[A1]], float [[B1]], float [[C1]])
 ; NO-FMA-NEXT:    [[FMA2:%.*]] = call float @llvm.fma.f32(float [[A2]], float [[B2]], float [[C2]])
 ; NO-FMA-NEXT:    [[FMA3:%.*]] = call float @llvm.fma.f32(float [[A3]], float [[B3]], float [[C3]])
-; NO-FMA-NEXT:    store float [[FMA0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    store float [[FMA1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    store float [[FMA2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    store float [[FMA3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    store float [[FMA0]], ptr @dst32, align 4
+; NO-FMA-NEXT:    store float [[FMA1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    store float [[FMA2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    store float [[FMA3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
 ; NO-FMA-NEXT:    ret void
 ;
 ; FMA-LABEL: @fma_4f32(
-; FMA-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; FMA-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
-; FMA-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcC32 to <4 x float>*), align 4
+; FMA-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; FMA-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
+; FMA-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr @srcC32, align 4
 ; FMA-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
-; FMA-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; FMA-NEXT:    store <4 x float> [[TMP4]], ptr @dst32, align 4
 ; FMA-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-  %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-  %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-  %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %c0 = load float, ptr @srcC32, align 4
+  %c1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+  %c2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+  %c3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
   %fma0 = call float @llvm.fma.f32(float %a0, float %b0, float %c0)
   %fma1 = call float @llvm.fma.f32(float %a1, float %b1, float %c1)
   %fma2 = call float @llvm.fma.f32(float %a2, float %b2, float %c2)
   %fma3 = call float @llvm.fma.f32(float %a3, float %b3, float %c3)
-  store float %fma0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fma1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fma2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fma3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %fma0, ptr @dst32, align 4
+  store float %fma1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fma2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fma3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @fma_8f32() #0 {
 ; NO-FMA-LABEL: @fma_8f32(
-; NO-FMA-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[B0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[B1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[B2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[B3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[B4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[B5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[B6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[B7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[C0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[C1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[C2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[C3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[C4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[C5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[C6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[C7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[A0:%.*]] = load float, ptr @srcA32, align 4
+; NO-FMA-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[B0:%.*]] = load float, ptr @srcB32, align 4
+; NO-FMA-NEXT:    [[B1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[B2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[B3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[B4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[B5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[B6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[B7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[C0:%.*]] = load float, ptr @srcC32, align 4
+; NO-FMA-NEXT:    [[C1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[C2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[C3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[C4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[C5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[C6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[C7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 7), align 4
 ; NO-FMA-NEXT:    [[FMA0:%.*]] = call float @llvm.fma.f32(float [[A0]], float [[B0]], float [[C0]])
 ; NO-FMA-NEXT:    [[FMA1:%.*]] = call float @llvm.fma.f32(float [[A1]], float [[B1]], float [[C1]])
 ; NO-FMA-NEXT:    [[FMA2:%.*]] = call float @llvm.fma.f32(float [[A2]], float [[B2]], float [[C2]])
@@ -311,48 +311,48 @@ define void @fma_8f32() #0 {
 ; NO-FMA-NEXT:    [[FMA5:%.*]] = call float @llvm.fma.f32(float [[A5]], float [[B5]], float [[C5]])
 ; NO-FMA-NEXT:    [[FMA6:%.*]] = call float @llvm.fma.f32(float [[A6]], float [[B6]], float [[C6]])
 ; NO-FMA-NEXT:    [[FMA7:%.*]] = call float @llvm.fma.f32(float [[A7]], float [[B7]], float [[C7]])
-; NO-FMA-NEXT:    store float [[FMA0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    store float [[FMA1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    store float [[FMA2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    store float [[FMA3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    store float [[FMA4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    store float [[FMA5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    store float [[FMA6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    store float [[FMA7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    store float [[FMA0]], ptr @dst32, align 4
+; NO-FMA-NEXT:    store float [[FMA1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    store float [[FMA2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    store float [[FMA3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    store float [[FMA4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    store float [[FMA5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    store float [[FMA6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    store float [[FMA7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
 ; NO-FMA-NEXT:    ret void
 ;
 ; FMA-LABEL: @fma_8f32(
-; FMA-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; FMA-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
-; FMA-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcC32 to <8 x float>*), align 4
+; FMA-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; FMA-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
+; FMA-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr @srcC32, align 4
 ; FMA-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]])
-; FMA-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; FMA-NEXT:    store <8 x float> [[TMP4]], ptr @dst32, align 4
 ; FMA-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-  %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-  %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-  %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
-  %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-  %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-  %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-  %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
-  %c4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
-  %c5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
-  %c6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
-  %c7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %b4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+  %b5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+  %b6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+  %b7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
+  %c0 = load float, ptr @srcC32, align 4
+  %c1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+  %c2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+  %c3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
+  %c4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 4), align 4
+  %c5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 5), align 4
+  %c6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 6), align 4
+  %c7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 7), align 4
   %fma0 = call float @llvm.fma.f32(float %a0, float %b0, float %c0)
   %fma1 = call float @llvm.fma.f32(float %a1, float %b1, float %c1)
   %fma2 = call float @llvm.fma.f32(float %a2, float %b2, float %c2)
@@ -361,67 +361,67 @@ define void @fma_8f32() #0 {
   %fma5 = call float @llvm.fma.f32(float %a5, float %b5, float %c5)
   %fma6 = call float @llvm.fma.f32(float %a6, float %b6, float %c6)
   %fma7 = call float @llvm.fma.f32(float %a7, float %b7, float %c7)
-  store float %fma0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fma1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fma2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fma3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %fma4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %fma5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %fma6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %fma7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %fma0, ptr @dst32, align 4
+  store float %fma1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fma2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fma3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %fma4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %fma5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %fma6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %fma7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fma_16f32() #0 {
 ; NO-FMA-LABEL: @fma_16f32(
-; NO-FMA-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[A8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8), align 4
-; NO-FMA-NEXT:    [[A9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 9), align 4
-; NO-FMA-NEXT:    [[A10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
-; NO-FMA-NEXT:    [[A11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
-; NO-FMA-NEXT:    [[A12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
-; NO-FMA-NEXT:    [[A13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
-; NO-FMA-NEXT:    [[A14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
-; NO-FMA-NEXT:    [[A15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
-; NO-FMA-NEXT:    [[B0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[B1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[B2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[B3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[B4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[B5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[B6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[B7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[B8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8), align 4
-; NO-FMA-NEXT:    [[B9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 9), align 4
-; NO-FMA-NEXT:    [[B10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
-; NO-FMA-NEXT:    [[B11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
-; NO-FMA-NEXT:    [[B12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
-; NO-FMA-NEXT:    [[B13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
-; NO-FMA-NEXT:    [[B14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
-; NO-FMA-NEXT:    [[B15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
-; NO-FMA-NEXT:    [[C0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    [[C1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    [[C2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    [[C3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    [[C4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    [[C5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    [[C6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    [[C7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    [[C8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8), align 4
-; NO-FMA-NEXT:    [[C9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 9), align 4
-; NO-FMA-NEXT:    [[C10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 10), align 4
-; NO-FMA-NEXT:    [[C11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 11), align 4
-; NO-FMA-NEXT:    [[C12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 12), align 4
-; NO-FMA-NEXT:    [[C13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 13), align 4
-; NO-FMA-NEXT:    [[C14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 14), align 4
-; NO-FMA-NEXT:    [[C15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 15), align 4
+; NO-FMA-NEXT:    [[A0:%.*]] = load float, ptr @srcA32, align 4
+; NO-FMA-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[A8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; NO-FMA-NEXT:    [[A9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 9), align 4
+; NO-FMA-NEXT:    [[A10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 10), align 4
+; NO-FMA-NEXT:    [[A11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 11), align 4
+; NO-FMA-NEXT:    [[A12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+; NO-FMA-NEXT:    [[A13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 13), align 4
+; NO-FMA-NEXT:    [[A14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 14), align 4
+; NO-FMA-NEXT:    [[A15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 15), align 4
+; NO-FMA-NEXT:    [[B0:%.*]] = load float, ptr @srcB32, align 4
+; NO-FMA-NEXT:    [[B1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[B2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[B3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[B4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[B5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[B6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[B7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[B8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
+; NO-FMA-NEXT:    [[B9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 9), align 4
+; NO-FMA-NEXT:    [[B10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 10), align 4
+; NO-FMA-NEXT:    [[B11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 11), align 4
+; NO-FMA-NEXT:    [[B12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+; NO-FMA-NEXT:    [[B13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 13), align 4
+; NO-FMA-NEXT:    [[B14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 14), align 4
+; NO-FMA-NEXT:    [[B15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 15), align 4
+; NO-FMA-NEXT:    [[C0:%.*]] = load float, ptr @srcC32, align 4
+; NO-FMA-NEXT:    [[C1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    [[C2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    [[C3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    [[C4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    [[C5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    [[C6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    [[C7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    [[C8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 8), align 4
+; NO-FMA-NEXT:    [[C9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 9), align 4
+; NO-FMA-NEXT:    [[C10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 10), align 4
+; NO-FMA-NEXT:    [[C11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 11), align 4
+; NO-FMA-NEXT:    [[C12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 12), align 4
+; NO-FMA-NEXT:    [[C13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 13), align 4
+; NO-FMA-NEXT:    [[C14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 14), align 4
+; NO-FMA-NEXT:    [[C15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 15), align 4
 ; NO-FMA-NEXT:    [[FMA0:%.*]] = call float @llvm.fma.f32(float [[A0]], float [[B0]], float [[C0]])
 ; NO-FMA-NEXT:    [[FMA1:%.*]] = call float @llvm.fma.f32(float [[A1]], float [[B1]], float [[C1]])
 ; NO-FMA-NEXT:    [[FMA2:%.*]] = call float @llvm.fma.f32(float [[A2]], float [[B2]], float [[C2]])
@@ -438,93 +438,93 @@ define void @fma_16f32() #0 {
 ; NO-FMA-NEXT:    [[FMA13:%.*]] = call float @llvm.fma.f32(float [[A13]], float [[B13]], float [[C13]])
 ; NO-FMA-NEXT:    [[FMA14:%.*]] = call float @llvm.fma.f32(float [[A14]], float [[B14]], float [[C14]])
 ; NO-FMA-NEXT:    [[FMA15:%.*]] = call float @llvm.fma.f32(float [[A15]], float [[B15]], float [[C15]])
-; NO-FMA-NEXT:    store float [[FMA0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; NO-FMA-NEXT:    store float [[FMA1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; NO-FMA-NEXT:    store float [[FMA2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; NO-FMA-NEXT:    store float [[FMA3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; NO-FMA-NEXT:    store float [[FMA4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; NO-FMA-NEXT:    store float [[FMA5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; NO-FMA-NEXT:    store float [[FMA6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; NO-FMA-NEXT:    store float [[FMA7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; NO-FMA-NEXT:    store float [[FMA8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
-; NO-FMA-NEXT:    store float [[FMA9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
-; NO-FMA-NEXT:    store float [[FMA10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-; NO-FMA-NEXT:    store float [[FMA11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-; NO-FMA-NEXT:    store float [[FMA12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-; NO-FMA-NEXT:    store float [[FMA13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-; NO-FMA-NEXT:    store float [[FMA14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-; NO-FMA-NEXT:    store float [[FMA15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; NO-FMA-NEXT:    store float [[FMA0]], ptr @dst32, align 4
+; NO-FMA-NEXT:    store float [[FMA1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; NO-FMA-NEXT:    store float [[FMA2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; NO-FMA-NEXT:    store float [[FMA3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; NO-FMA-NEXT:    store float [[FMA4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; NO-FMA-NEXT:    store float [[FMA5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; NO-FMA-NEXT:    store float [[FMA6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; NO-FMA-NEXT:    store float [[FMA7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
+; NO-FMA-NEXT:    store float [[FMA8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; NO-FMA-NEXT:    store float [[FMA9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9), align 4
+; NO-FMA-NEXT:    store float [[FMA10]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+; NO-FMA-NEXT:    store float [[FMA11]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+; NO-FMA-NEXT:    store float [[FMA12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+; NO-FMA-NEXT:    store float [[FMA13]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+; NO-FMA-NEXT:    store float [[FMA14]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+; NO-FMA-NEXT:    store float [[FMA15]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
 ; NO-FMA-NEXT:    ret void
 ;
 ; FMA256-LABEL: @fma_16f32(
-; FMA256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; FMA256-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
-; FMA256-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcC32 to <8 x float>*), align 4
+; FMA256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; FMA256-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
+; FMA256-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr @srcC32, align 4
 ; FMA256-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]])
-; FMA256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; FMA256-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <8 x float>*), align 4
-; FMA256-NEXT:    [[TMP6:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <8 x float>*), align 4
-; FMA256-NEXT:    [[TMP7:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8) to <8 x float>*), align 4
+; FMA256-NEXT:    store <8 x float> [[TMP4]], ptr @dst32, align 4
+; FMA256-NEXT:    [[TMP5:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; FMA256-NEXT:    [[TMP6:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
+; FMA256-NEXT:    [[TMP7:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 8), align 4
 ; FMA256-NEXT:    [[TMP8:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP5]], <8 x float> [[TMP6]], <8 x float> [[TMP7]])
-; FMA256-NEXT:    store <8 x float> [[TMP8]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; FMA256-NEXT:    store <8 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; FMA256-NEXT:    ret void
 ;
 ; FMA512-LABEL: @fma_16f32(
-; FMA512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcA32 to <16 x float>*), align 4
-; FMA512-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcB32 to <16 x float>*), align 4
-; FMA512-NEXT:    [[TMP3:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcC32 to <16 x float>*), align 4
+; FMA512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @srcA32, align 4
+; FMA512-NEXT:    [[TMP2:%.*]] = load <16 x float>, ptr @srcB32, align 4
+; FMA512-NEXT:    [[TMP3:%.*]] = load <16 x float>, ptr @srcC32, align 4
 ; FMA512-NEXT:    [[TMP4:%.*]] = call <16 x float> @llvm.fma.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x float> [[TMP3]])
-; FMA512-NEXT:    store <16 x float> [[TMP4]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; FMA512-NEXT:    store <16 x float> [[TMP4]], ptr @dst32, align 4
 ; FMA512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
-  %b0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  0), align 4
-  %b1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  1), align 4
-  %b2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  2), align 4
-  %b3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  3), align 4
-  %b4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  4), align 4
-  %b5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  5), align 4
-  %b6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  6), align 4
-  %b7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  7), align 4
-  %b8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  8), align 4
-  %b9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  9), align 4
-  %b10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
-  %b11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
-  %b12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
-  %b13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
-  %b14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
-  %b15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
-  %c0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  0), align 4
-  %c1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  1), align 4
-  %c2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  2), align 4
-  %c3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  3), align 4
-  %c4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  4), align 4
-  %c5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  5), align 4
-  %c6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  6), align 4
-  %c7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  7), align 4
-  %c8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  8), align 4
-  %c9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  9), align 4
-  %c10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 10), align 4
-  %c11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 11), align 4
-  %c12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 12), align 4
-  %c13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 13), align 4
-  %c14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 14), align 4
-  %c15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @srcA32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 15), align 4
+  %b0  = load float, ptr @srcB32, align 4
+  %b1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  1), align 4
+  %b2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  2), align 4
+  %b3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  3), align 4
+  %b4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  4), align 4
+  %b5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  5), align 4
+  %b6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  6), align 4
+  %b7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  7), align 4
+  %b8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  8), align 4
+  %b9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  9), align 4
+  %b10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 10), align 4
+  %b11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 11), align 4
+  %b12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+  %b13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 13), align 4
+  %b14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 14), align 4
+  %b15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 15), align 4
+  %c0  = load float, ptr @srcC32, align 4
+  %c1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  1), align 4
+  %c2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  2), align 4
+  %c3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  3), align 4
+  %c4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  4), align 4
+  %c5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  5), align 4
+  %c6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  6), align 4
+  %c7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  7), align 4
+  %c8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  8), align 4
+  %c9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  9), align 4
+  %c10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 10), align 4
+  %c11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 11), align 4
+  %c12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 12), align 4
+  %c13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 13), align 4
+  %c14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 14), align 4
+  %c15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 15), align 4
   %fma0  = call float @llvm.fma.f32(float %a0 , float %b0 , float %c0 )
   %fma1  = call float @llvm.fma.f32(float %a1 , float %b1 , float %c1 )
   %fma2  = call float @llvm.fma.f32(float %a2 , float %b2 , float %c2 )
@@ -541,22 +541,22 @@ define void @fma_16f32() #0 {
   %fma13 = call float @llvm.fma.f32(float %a13, float %b13, float %c13)
   %fma14 = call float @llvm.fma.f32(float %a14, float %b14, float %c14)
   %fma15 = call float @llvm.fma.f32(float %a15, float %b15, float %c15)
-  store float %fma0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %fma1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %fma2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %fma3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %fma4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %fma5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %fma6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %fma7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %fma8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %fma9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %fma10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %fma11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %fma12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %fma13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %fma14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %fma15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %fma0 , ptr @dst32, align 4
+  store float %fma1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %fma2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %fma3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %fma4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %fma5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %fma6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %fma7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %fma8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %fma9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %fma10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %fma11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %fma12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %fma13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %fma14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %fma15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
index 31c473ca7e510..a42567c5e2e46 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
@@ -24,115 +24,115 @@ declare double @llvm.maxnum.f64(double, double)
 
 define void @fmaxnum_2f64() #0 {
 ; CHECK-LABEL: @fmaxnum_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
   %fmaxnum0 = call double @llvm.maxnum.f64(double %a0, double %b0)
   %fmaxnum1 = call double @llvm.maxnum.f64(double %a1, double %b1)
-  store double %fmaxnum0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fmaxnum1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %fmaxnum0, ptr @dst64, align 8
+  store double %fmaxnum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @fmaxnum_4f64() #0 {
 ; SSE-LABEL: @fmaxnum_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fmaxnum_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
-; AVX-NEXT:    store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
   %fmaxnum0 = call double @llvm.maxnum.f64(double %a0, double %b0)
   %fmaxnum1 = call double @llvm.maxnum.f64(double %a1, double %b1)
   %fmaxnum2 = call double @llvm.maxnum.f64(double %a2, double %b2)
   %fmaxnum3 = call double @llvm.maxnum.f64(double %a3, double %b3)
-  store double %fmaxnum0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fmaxnum1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %fmaxnum2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %fmaxnum3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %fmaxnum0, ptr @dst64, align 8
+  store double %fmaxnum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %fmaxnum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %fmaxnum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @fmaxnum_8f64() #0 {
 ; SSE-LABEL: @fmaxnum_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]])
-; SSE-NEXT:    store <2 x double> [[TMP9]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP9]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP10]], <2 x double> [[TMP11]])
-; SSE-NEXT:    store <2 x double> [[TMP12]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP12]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fmaxnum_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
-; AVX256-NEXT:    store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = call <4 x double> @llvm.maxnum.v4f64(<4 x double> [[TMP4]], <4 x double> [[TMP5]])
-; AVX256-NEXT:    store <4 x double> [[TMP6]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fmaxnum_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcA64 to <8 x double>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcB64 to <8 x double>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @srcA64, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, ptr @srcB64, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x double> @llvm.maxnum.v8f64(<8 x double> [[TMP1]], <8 x double> [[TMP2]])
-; AVX512-NEXT:    store <8 x double> [[TMP3]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT:    store <8 x double> [[TMP3]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
-  %b4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
-  %b5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
-  %b6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
-  %b7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @srcA64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
+  %b0 = load double, ptr @srcB64, align 4
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
+  %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+  %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
+  %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+  %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
   %fmaxnum0 = call double @llvm.maxnum.f64(double %a0, double %b0)
   %fmaxnum1 = call double @llvm.maxnum.f64(double %a1, double %b1)
   %fmaxnum2 = call double @llvm.maxnum.f64(double %a2, double %b2)
@@ -141,79 +141,79 @@ define void @fmaxnum_8f64() #0 {
   %fmaxnum5 = call double @llvm.maxnum.f64(double %a5, double %b5)
   %fmaxnum6 = call double @llvm.maxnum.f64(double %a6, double %b6)
   %fmaxnum7 = call double @llvm.maxnum.f64(double %a7, double %b7)
-  store double %fmaxnum0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %fmaxnum1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %fmaxnum2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %fmaxnum3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %fmaxnum4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %fmaxnum5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %fmaxnum6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %fmaxnum7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %fmaxnum0, ptr @dst64, align 4
+  store double %fmaxnum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %fmaxnum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %fmaxnum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %fmaxnum4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %fmaxnum5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %fmaxnum6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %fmaxnum7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fmaxnum_4f32() #0 {
 ; CHECK-LABEL: @fmaxnum_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
   %fmaxnum0 = call float @llvm.maxnum.f32(float %a0, float %b0)
   %fmaxnum1 = call float @llvm.maxnum.f32(float %a1, float %b1)
   %fmaxnum2 = call float @llvm.maxnum.f32(float %a2, float %b2)
   %fmaxnum3 = call float @llvm.maxnum.f32(float %a3, float %b3)
-  store float %fmaxnum0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fmaxnum1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fmaxnum2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fmaxnum3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %fmaxnum0, ptr @dst32, align 4
+  store float %fmaxnum1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fmaxnum2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fmaxnum3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @fmaxnum_8f32() #0 {
 ; SSE-LABEL: @fmaxnum_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; SSE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fmaxnum_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]])
-; AVX-NEXT:    store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP3]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-  %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-  %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-  %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %b4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+  %b5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+  %b6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+  %b7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
   %fmaxnum0 = call float @llvm.maxnum.f32(float %a0, float %b0)
   %fmaxnum1 = call float @llvm.maxnum.f32(float %a1, float %b1)
   %fmaxnum2 = call float @llvm.maxnum.f32(float %a2, float %b2)
@@ -222,87 +222,87 @@ define void @fmaxnum_8f32() #0 {
   %fmaxnum5 = call float @llvm.maxnum.f32(float %a5, float %b5)
   %fmaxnum6 = call float @llvm.maxnum.f32(float %a6, float %b6)
   %fmaxnum7 = call float @llvm.maxnum.f32(float %a7, float %b7)
-  store float %fmaxnum0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fmaxnum1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fmaxnum2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fmaxnum3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %fmaxnum4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %fmaxnum5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %fmaxnum6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %fmaxnum7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %fmaxnum0, ptr @dst32, align 4
+  store float %fmaxnum1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fmaxnum2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fmaxnum3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %fmaxnum4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %fmaxnum5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %fmaxnum6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %fmaxnum7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fmaxnum_16f32() #0 {
 ; SSE-LABEL: @fmaxnum_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; SSE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]])
-; SSE-NEXT:    store <4 x float> [[TMP9]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP10]], <4 x float> [[TMP11]])
-; SSE-NEXT:    store <4 x float> [[TMP12]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fmaxnum_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]])
-; AVX256-NEXT:    store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP3]], ptr @dst32, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP5]])
-; AVX256-NEXT:    store <8 x float> [[TMP6]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fmaxnum_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcA32 to <16 x float>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcB32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @srcA32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, ptr @srcB32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x float> @llvm.maxnum.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP2]])
-; AVX512-NEXT:    store <16 x float> [[TMP3]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP3]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
-  %b0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  0), align 4
-  %b1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  1), align 4
-  %b2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  2), align 4
-  %b3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  3), align 4
-  %b4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  4), align 4
-  %b5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  5), align 4
-  %b6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  6), align 4
-  %b7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  7), align 4
-  %b8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  8), align 4
-  %b9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  9), align 4
-  %b10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
-  %b11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
-  %b12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
-  %b13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
-  %b14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
-  %b15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @srcA32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 15), align 4
+  %b0  = load float, ptr @srcB32, align 4
+  %b1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  1), align 4
+  %b2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  2), align 4
+  %b3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  3), align 4
+  %b4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  4), align 4
+  %b5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  5), align 4
+  %b6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  6), align 4
+  %b7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  7), align 4
+  %b8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  8), align 4
+  %b9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  9), align 4
+  %b10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 10), align 4
+  %b11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 11), align 4
+  %b12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+  %b13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 13), align 4
+  %b14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 14), align 4
+  %b15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 15), align 4
   %fmaxnum0  = call float @llvm.maxnum.f32(float %a0 , float %b0 )
   %fmaxnum1  = call float @llvm.maxnum.f32(float %a1 , float %b1 )
   %fmaxnum2  = call float @llvm.maxnum.f32(float %a2 , float %b2 )
@@ -319,59 +319,57 @@ define void @fmaxnum_16f32() #0 {
   %fmaxnum13 = call float @llvm.maxnum.f32(float %a13, float %b13)
   %fmaxnum14 = call float @llvm.maxnum.f32(float %a14, float %b14)
   %fmaxnum15 = call float @llvm.maxnum.f32(float %a15, float %b15)
-  store float %fmaxnum0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %fmaxnum1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %fmaxnum2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %fmaxnum3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %fmaxnum4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %fmaxnum5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %fmaxnum6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %fmaxnum7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %fmaxnum8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %fmaxnum9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %fmaxnum10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %fmaxnum11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %fmaxnum12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %fmaxnum13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %fmaxnum14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %fmaxnum15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %fmaxnum0 , ptr @dst32, align 4
+  store float %fmaxnum1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %fmaxnum2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %fmaxnum3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %fmaxnum4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %fmaxnum5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %fmaxnum6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %fmaxnum7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %fmaxnum8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %fmaxnum9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %fmaxnum10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %fmaxnum11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %fmaxnum12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %fmaxnum13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %fmaxnum14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %fmaxnum15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
-define float @reduction_v4f32_fast(float* %p) {
+define float @reduction_v4f32_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f32_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
   %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
   %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
   %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
   ret float %m3
 }
 
-define float @reduction_v4f32_nnan(float* %p) {
+define float @reduction_v4f32_nnan(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f32_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
   %m1 = tail call nnan float @llvm.maxnum.f32(float %t1, float %t0)
   %m2 = tail call nnan float @llvm.maxnum.f32(float %t2, float %m1)
   %m3 = tail call nnan float @llvm.maxnum.f32(float %t3, float %m2)
@@ -380,55 +378,54 @@ define float @reduction_v4f32_nnan(float* %p) {
 
 ; Negative test - must have nnan.
 
-define float @reduction_v4f32_not_fast(float* %p) {
+define float @reduction_v4f32_not_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f32_not_fast(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
-; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
-; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
-; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
-; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, ptr [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, ptr [[G3]], align 4
 ; CHECK-NEXT:    [[M1:%.*]] = tail call float @llvm.maxnum.f32(float [[T1]], float [[T0]])
 ; CHECK-NEXT:    [[M2:%.*]] = tail call float @llvm.maxnum.f32(float [[T2]], float [[M1]])
 ; CHECK-NEXT:    [[M3:%.*]] = tail call float @llvm.maxnum.f32(float [[T3]], float [[M2]])
 ; CHECK-NEXT:    ret float [[M3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
   %m1 = tail call float @llvm.maxnum.f32(float %t1, float %t0)
   %m2 = tail call float @llvm.maxnum.f32(float %t2, float %m1)
   %m3 = tail call float @llvm.maxnum.f32(float %t3, float %m2)
   ret float %m3
 }
 
-define float @reduction_v8f32_fast(float* %p) {
+define float @reduction_v8f32_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v8f32_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %g4 = getelementptr inbounds float, float* %p, i64 4
-  %g5 = getelementptr inbounds float, float* %p, i64 5
-  %g6 = getelementptr inbounds float, float* %p, i64 6
-  %g7 = getelementptr inbounds float, float* %p, i64 7
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
-  %t4 = load float, float* %g4, align 4
-  %t5 = load float, float* %g5, align 4
-  %t6 = load float, float* %g6, align 4
-  %t7 = load float, float* %g7, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %g4 = getelementptr inbounds float, ptr %p, i64 4
+  %g5 = getelementptr inbounds float, ptr %p, i64 5
+  %g6 = getelementptr inbounds float, ptr %p, i64 6
+  %g7 = getelementptr inbounds float, ptr %p, i64 7
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
+  %t4 = load float, ptr %g4, align 4
+  %t5 = load float, ptr %g5, align 4
+  %t6 = load float, ptr %g6, align 4
+  %t7 = load float, ptr %g7, align 4
   %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
   %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
   %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
@@ -439,35 +436,34 @@ define float @reduction_v8f32_fast(float* %p) {
   ret float %m7
 }
 
-define double @reduction_v2f64_fast(double* %p) {
+define double @reduction_v2f64_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v2f64_fast(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
 ; CHECK-NEXT:    [[M1:%.*]] = tail call fast double @llvm.maxnum.f64(double [[T1]], double [[T0]])
 ; CHECK-NEXT:    ret double [[M1]]
 ;
-  %g1 = getelementptr inbounds double, double* %p, i64 1
-  %t0 = load double, double* %p, align 4
-  %t1 = load double, double* %g1, align 4
+  %g1 = getelementptr inbounds double, ptr %p, i64 1
+  %t0 = load double, ptr %p, align 4
+  %t1 = load double, ptr %g1, align 4
   %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
   ret double %m1
 }
 
-define double @reduction_v4f64_fast(double* %p) {
+define double @reduction_v4f64_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f64_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[P:%.*]] to <4 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP2]])
 ; CHECK-NEXT:    ret double [[TMP3]]
 ;
-  %g1 = getelementptr inbounds double, double* %p, i64 1
-  %g2 = getelementptr inbounds double, double* %p, i64 2
-  %g3 = getelementptr inbounds double, double* %p, i64 3
-  %t0 = load double, double* %p, align 4
-  %t1 = load double, double* %g1, align 4
-  %t2 = load double, double* %g2, align 4
-  %t3 = load double, double* %g3, align 4
+  %g1 = getelementptr inbounds double, ptr %p, i64 1
+  %g2 = getelementptr inbounds double, ptr %p, i64 2
+  %g3 = getelementptr inbounds double, ptr %p, i64 3
+  %t0 = load double, ptr %p, align 4
+  %t1 = load double, ptr %g1, align 4
+  %t2 = load double, ptr %g2, align 4
+  %t3 = load double, ptr %g3, align 4
   %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
   %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1)
   %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2)
@@ -476,27 +472,27 @@ define double @reduction_v4f64_fast(double* %p) {
 
 ; Negative test - must have nnan.
 
-define double @reduction_v4f64_wrong_fmf(double* %p) {
+define double @reduction_v4f64_wrong_fmf(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f64_wrong_fmf(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
-; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
-; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
-; CHECK-NEXT:    [[T2:%.*]] = load double, double* [[G2]], align 4
-; CHECK-NEXT:    [[T3:%.*]] = load double, double* [[G3]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, ptr [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, ptr [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load double, ptr [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load double, ptr [[G3]], align 4
 ; CHECK-NEXT:    [[M1:%.*]] = tail call ninf nsz double @llvm.maxnum.f64(double [[T1]], double [[T0]])
 ; CHECK-NEXT:    [[M2:%.*]] = tail call ninf nsz double @llvm.maxnum.f64(double [[T2]], double [[M1]])
 ; CHECK-NEXT:    [[M3:%.*]] = tail call ninf nsz double @llvm.maxnum.f64(double [[T3]], double [[M2]])
 ; CHECK-NEXT:    ret double [[M3]]
 ;
-  %g1 = getelementptr inbounds double, double* %p, i64 1
-  %g2 = getelementptr inbounds double, double* %p, i64 2
-  %g3 = getelementptr inbounds double, double* %p, i64 3
-  %t0 = load double, double* %p, align 4
-  %t1 = load double, double* %g1, align 4
-  %t2 = load double, double* %g2, align 4
-  %t3 = load double, double* %g3, align 4
+  %g1 = getelementptr inbounds double, ptr %p, i64 1
+  %g2 = getelementptr inbounds double, ptr %p, i64 2
+  %g3 = getelementptr inbounds double, ptr %p, i64 3
+  %t0 = load double, ptr %p, align 4
+  %t1 = load double, ptr %g1, align 4
+  %t2 = load double, ptr %g2, align 4
+  %t3 = load double, ptr %g3, align 4
   %m1 = tail call ninf nsz double @llvm.maxnum.f64(double %t1, double %t0)
   %m2 = tail call ninf nsz double @llvm.maxnum.f64(double %t2, double %m1)
   %m3 = tail call ninf nsz double @llvm.maxnum.f64(double %t3, double %m2)

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
index 7f17fa8d14b74..434fa13e880bb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
@@ -24,115 +24,115 @@ declare double @llvm.minnum.f64(double, double)
 
 define void @fminnum_2f64() #0 {
 ; CHECK-LABEL: @fminnum_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
   %fminnum0 = call double @llvm.minnum.f64(double %a0, double %b0)
   %fminnum1 = call double @llvm.minnum.f64(double %a1, double %b1)
-  store double %fminnum0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fminnum1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %fminnum0, ptr @dst64, align 8
+  store double %fminnum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @fminnum_4f64() #0 {
 ; SSE-LABEL: @fminnum_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fminnum_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.minnum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
-; AVX-NEXT:    store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
   %fminnum0 = call double @llvm.minnum.f64(double %a0, double %b0)
   %fminnum1 = call double @llvm.minnum.f64(double %a1, double %b1)
   %fminnum2 = call double @llvm.minnum.f64(double %a2, double %b2)
   %fminnum3 = call double @llvm.minnum.f64(double %a3, double %b3)
-  store double %fminnum0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fminnum1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %fminnum2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %fminnum3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %fminnum0, ptr @dst64, align 8
+  store double %fminnum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %fminnum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %fminnum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @fminnum_8f64() #0 {
 ; SSE-LABEL: @fminnum_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; SSE-NEXT:    store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP3]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]])
-; SSE-NEXT:    store <2 x double> [[TMP9]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP9]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP10]], <2 x double> [[TMP11]])
-; SSE-NEXT:    store <2 x double> [[TMP12]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP12]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fminnum_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = call <4 x double> @llvm.minnum.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]])
-; AVX256-NEXT:    store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP3]], ptr @dst64, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = call <4 x double> @llvm.minnum.v4f64(<4 x double> [[TMP4]], <4 x double> [[TMP5]])
-; AVX256-NEXT:    store <4 x double> [[TMP6]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fminnum_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcA64 to <8 x double>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcB64 to <8 x double>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @srcA64, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, ptr @srcB64, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x double> @llvm.minnum.v8f64(<8 x double> [[TMP1]], <8 x double> [[TMP2]])
-; AVX512-NEXT:    store <8 x double> [[TMP3]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT:    store <8 x double> [[TMP3]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
-  %b4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
-  %b5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
-  %b6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
-  %b7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @srcA64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
+  %b0 = load double, ptr @srcB64, align 4
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
+  %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+  %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
+  %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+  %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
   %fminnum0 = call double @llvm.minnum.f64(double %a0, double %b0)
   %fminnum1 = call double @llvm.minnum.f64(double %a1, double %b1)
   %fminnum2 = call double @llvm.minnum.f64(double %a2, double %b2)
@@ -141,79 +141,79 @@ define void @fminnum_8f64() #0 {
   %fminnum5 = call double @llvm.minnum.f64(double %a5, double %b5)
   %fminnum6 = call double @llvm.minnum.f64(double %a6, double %b6)
   %fminnum7 = call double @llvm.minnum.f64(double %a7, double %b7)
-  store double %fminnum0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %fminnum1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %fminnum2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %fminnum3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %fminnum4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %fminnum5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %fminnum6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %fminnum7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %fminnum0, ptr @dst64, align 4
+  store double %fminnum1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %fminnum2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %fminnum3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %fminnum4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %fminnum5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %fminnum6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %fminnum7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fminnum_4f32() #0 {
 ; CHECK-LABEL: @fminnum_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
   %fminnum0 = call float @llvm.minnum.f32(float %a0, float %b0)
   %fminnum1 = call float @llvm.minnum.f32(float %a1, float %b1)
   %fminnum2 = call float @llvm.minnum.f32(float %a2, float %b2)
   %fminnum3 = call float @llvm.minnum.f32(float %a3, float %b3)
-  store float %fminnum0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fminnum1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fminnum2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fminnum3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %fminnum0, ptr @dst32, align 4
+  store float %fminnum1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fminnum2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fminnum3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @fminnum_8f32() #0 {
 ; SSE-LABEL: @fminnum_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; SSE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fminnum_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]])
-; AVX-NEXT:    store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP3]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-  %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-  %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-  %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %b4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+  %b5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+  %b6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+  %b7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
   %fminnum0 = call float @llvm.minnum.f32(float %a0, float %b0)
   %fminnum1 = call float @llvm.minnum.f32(float %a1, float %b1)
   %fminnum2 = call float @llvm.minnum.f32(float %a2, float %b2)
@@ -222,87 +222,87 @@ define void @fminnum_8f32() #0 {
   %fminnum5 = call float @llvm.minnum.f32(float %a5, float %b5)
   %fminnum6 = call float @llvm.minnum.f32(float %a6, float %b6)
   %fminnum7 = call float @llvm.minnum.f32(float %a7, float %b7)
-  store float %fminnum0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fminnum1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fminnum2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fminnum3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %fminnum4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %fminnum5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %fminnum6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %fminnum7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %fminnum0, ptr @dst32, align 4
+  store float %fminnum1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fminnum2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fminnum3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %fminnum4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %fminnum5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %fminnum6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %fminnum7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fminnum_16f32() #0 {
 ; SSE-LABEL: @fminnum_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; SSE-NEXT:    store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP3]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]])
-; SSE-NEXT:    store <4 x float> [[TMP9]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP10]], <4 x float> [[TMP11]])
-; SSE-NEXT:    store <4 x float> [[TMP12]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fminnum_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
 ; AVX256-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]])
-; AVX256-NEXT:    store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP3]], ptr @dst32, align 4
+; AVX256-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP5]])
-; AVX256-NEXT:    store <8 x float> [[TMP6]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fminnum_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcA32 to <16 x float>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcB32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @srcA32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, ptr @srcB32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = call <16 x float> @llvm.minnum.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP2]])
-; AVX512-NEXT:    store <16 x float> [[TMP3]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP3]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
-  %b0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  0), align 4
-  %b1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  1), align 4
-  %b2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  2), align 4
-  %b3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  3), align 4
-  %b4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  4), align 4
-  %b5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  5), align 4
-  %b6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  6), align 4
-  %b7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  7), align 4
-  %b8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  8), align 4
-  %b9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  9), align 4
-  %b10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
-  %b11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
-  %b12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
-  %b13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
-  %b14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
-  %b15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @srcA32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 15), align 4
+  %b0  = load float, ptr @srcB32, align 4
+  %b1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  1), align 4
+  %b2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  2), align 4
+  %b3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  3), align 4
+  %b4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  4), align 4
+  %b5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  5), align 4
+  %b6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  6), align 4
+  %b7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  7), align 4
+  %b8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  8), align 4
+  %b9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  9), align 4
+  %b10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 10), align 4
+  %b11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 11), align 4
+  %b12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+  %b13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 13), align 4
+  %b14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 14), align 4
+  %b15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 15), align 4
   %fminnum0  = call float @llvm.minnum.f32(float %a0 , float %b0 )
   %fminnum1  = call float @llvm.minnum.f32(float %a1 , float %b1 )
   %fminnum2  = call float @llvm.minnum.f32(float %a2 , float %b2 )
@@ -319,59 +319,57 @@ define void @fminnum_16f32() #0 {
   %fminnum13 = call float @llvm.minnum.f32(float %a13, float %b13)
   %fminnum14 = call float @llvm.minnum.f32(float %a14, float %b14)
   %fminnum15 = call float @llvm.minnum.f32(float %a15, float %b15)
-  store float %fminnum0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %fminnum1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %fminnum2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %fminnum3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %fminnum4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %fminnum5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %fminnum6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %fminnum7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %fminnum8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %fminnum9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %fminnum10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %fminnum11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %fminnum12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %fminnum13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %fminnum14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %fminnum15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %fminnum0 , ptr @dst32, align 4
+  store float %fminnum1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %fminnum2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %fminnum3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %fminnum4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %fminnum5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %fminnum6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %fminnum7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %fminnum8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %fminnum9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %fminnum10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %fminnum11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %fminnum12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %fminnum13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %fminnum14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %fminnum15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
-define float @reduction_v4f32_fast(float* %p) {
+define float @reduction_v4f32_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f32_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
   %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0)
   %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1)
   %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2)
   ret float %m3
 }
 
-define float @reduction_v4f32_nnan(float* %p) {
+define float @reduction_v4f32_nnan(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f32_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
   %m1 = tail call nnan float @llvm.minnum.f32(float %t1, float %t0)
   %m2 = tail call nnan float @llvm.minnum.f32(float %t2, float %m1)
   %m3 = tail call nnan float @llvm.minnum.f32(float %t3, float %m2)
@@ -380,55 +378,54 @@ define float @reduction_v4f32_nnan(float* %p) {
 
 ; Negative test - must have nnan.
 
-define float @reduction_v4f32_wrong_fmf(float* %p) {
+define float @reduction_v4f32_wrong_fmf(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f32_wrong_fmf(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
-; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
-; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[P]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[G1]], align 4
-; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[G2]], align 4
-; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, ptr [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, ptr [[G3]], align 4
 ; CHECK-NEXT:    [[M1:%.*]] = tail call reassoc nsz float @llvm.minnum.f32(float [[T1]], float [[T0]])
 ; CHECK-NEXT:    [[M2:%.*]] = tail call reassoc nsz float @llvm.minnum.f32(float [[T2]], float [[M1]])
 ; CHECK-NEXT:    [[M3:%.*]] = tail call reassoc nsz float @llvm.minnum.f32(float [[T3]], float [[M2]])
 ; CHECK-NEXT:    ret float [[M3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
   %m1 = tail call reassoc nsz float @llvm.minnum.f32(float %t1, float %t0)
   %m2 = tail call reassoc nsz float @llvm.minnum.f32(float %t2, float %m1)
   %m3 = tail call reassoc nsz float @llvm.minnum.f32(float %t3, float %m2)
   ret float %m3
 }
 
-define float @reduction_v8f32_fast(float* %p) {
+define float @reduction_v8f32_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v8f32_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %g1 = getelementptr inbounds float, float* %p, i64 1
-  %g2 = getelementptr inbounds float, float* %p, i64 2
-  %g3 = getelementptr inbounds float, float* %p, i64 3
-  %g4 = getelementptr inbounds float, float* %p, i64 4
-  %g5 = getelementptr inbounds float, float* %p, i64 5
-  %g6 = getelementptr inbounds float, float* %p, i64 6
-  %g7 = getelementptr inbounds float, float* %p, i64 7
-  %t0 = load float, float* %p, align 4
-  %t1 = load float, float* %g1, align 4
-  %t2 = load float, float* %g2, align 4
-  %t3 = load float, float* %g3, align 4
-  %t4 = load float, float* %g4, align 4
-  %t5 = load float, float* %g5, align 4
-  %t6 = load float, float* %g6, align 4
-  %t7 = load float, float* %g7, align 4
+  %g1 = getelementptr inbounds float, ptr %p, i64 1
+  %g2 = getelementptr inbounds float, ptr %p, i64 2
+  %g3 = getelementptr inbounds float, ptr %p, i64 3
+  %g4 = getelementptr inbounds float, ptr %p, i64 4
+  %g5 = getelementptr inbounds float, ptr %p, i64 5
+  %g6 = getelementptr inbounds float, ptr %p, i64 6
+  %g7 = getelementptr inbounds float, ptr %p, i64 7
+  %t0 = load float, ptr %p, align 4
+  %t1 = load float, ptr %g1, align 4
+  %t2 = load float, ptr %g2, align 4
+  %t3 = load float, ptr %g3, align 4
+  %t4 = load float, ptr %g4, align 4
+  %t5 = load float, ptr %g5, align 4
+  %t6 = load float, ptr %g6, align 4
+  %t7 = load float, ptr %g7, align 4
   %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0)
   %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1)
   %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2)
@@ -439,35 +436,34 @@ define float @reduction_v8f32_fast(float* %p) {
   ret float %m7
 }
 
-define double @reduction_v2f64_fast(double* %p) {
+define double @reduction_v2f64_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v2f64_fast(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
 ; CHECK-NEXT:    [[M1:%.*]] = tail call fast double @llvm.minnum.f64(double [[T1]], double [[T0]])
 ; CHECK-NEXT:    ret double [[M1]]
 ;
-  %g1 = getelementptr inbounds double, double* %p, i64 1
-  %t0 = load double, double* %p, align 4
-  %t1 = load double, double* %g1, align 4
+  %g1 = getelementptr inbounds double, ptr %p, i64 1
+  %t0 = load double, ptr %p, align 4
+  %t1 = load double, ptr %g1, align 4
   %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
   ret double %m1
 }
 
-define double @reduction_v4f64_fast(double* %p) {
+define double @reduction_v4f64_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f64_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[P:%.*]] to <4 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP2]])
 ; CHECK-NEXT:    ret double [[TMP3]]
 ;
-  %g1 = getelementptr inbounds double, double* %p, i64 1
-  %g2 = getelementptr inbounds double, double* %p, i64 2
-  %g3 = getelementptr inbounds double, double* %p, i64 3
-  %t0 = load double, double* %p, align 4
-  %t1 = load double, double* %g1, align 4
-  %t2 = load double, double* %g2, align 4
-  %t3 = load double, double* %g3, align 4
+  %g1 = getelementptr inbounds double, ptr %p, i64 1
+  %g2 = getelementptr inbounds double, ptr %p, i64 2
+  %g3 = getelementptr inbounds double, ptr %p, i64 3
+  %t0 = load double, ptr %p, align 4
+  %t1 = load double, ptr %g1, align 4
+  %t2 = load double, ptr %g2, align 4
+  %t3 = load double, ptr %g3, align 4
   %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
   %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1)
   %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2)
@@ -476,27 +472,27 @@ define double @reduction_v4f64_fast(double* %p) {
 
 ; Negative test - must have nnan.
 
-define double @reduction_v4f64_not_fast(double* %p) {
+define double @reduction_v4f64_not_fast(ptr %p) {
 ; CHECK-LABEL: @reduction_v4f64_not_fast(
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
-; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
-; CHECK-NEXT:    [[T0:%.*]] = load double, double* [[P]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load double, double* [[G1]], align 4
-; CHECK-NEXT:    [[T2:%.*]] = load double, double* [[G2]], align 4
-; CHECK-NEXT:    [[T3:%.*]] = load double, double* [[G3]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds double, ptr [[P]], i64 2
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds double, ptr [[P]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[P]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load double, ptr [[G1]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load double, ptr [[G2]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load double, ptr [[G3]], align 4
 ; CHECK-NEXT:    [[M1:%.*]] = tail call double @llvm.minnum.f64(double [[T1]], double [[T0]])
 ; CHECK-NEXT:    [[M2:%.*]] = tail call double @llvm.minnum.f64(double [[T2]], double [[M1]])
 ; CHECK-NEXT:    [[M3:%.*]] = tail call double @llvm.minnum.f64(double [[T3]], double [[M2]])
 ; CHECK-NEXT:    ret double [[M3]]
 ;
-  %g1 = getelementptr inbounds double, double* %p, i64 1
-  %g2 = getelementptr inbounds double, double* %p, i64 2
-  %g3 = getelementptr inbounds double, double* %p, i64 3
-  %t0 = load double, double* %p, align 4
-  %t1 = load double, double* %g1, align 4
-  %t2 = load double, double* %g2, align 4
-  %t3 = load double, double* %g3, align 4
+  %g1 = getelementptr inbounds double, ptr %p, i64 1
+  %g2 = getelementptr inbounds double, ptr %p, i64 2
+  %g3 = getelementptr inbounds double, ptr %p, i64 3
+  %t0 = load double, ptr %p, align 4
+  %t1 = load double, ptr %g1, align 4
+  %t2 = load double, ptr %g2, align 4
+  %t3 = load double, ptr %g3, align 4
   %m1 = tail call double @llvm.minnum.f64(double %t1, double %t0)
   %m2 = tail call double @llvm.minnum.f64(double %t2, double %m1)
   %m3 = tail call double @llvm.minnum.f64(double %t3, double %m2)

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
index f413164d4b9d7..28e837c2d7a4e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
@@ -26,140 +26,140 @@ declare double @llvm.fmuladd.f64(double, double, double)
 
 define void @fmuladd_2f64() #0 {
 ; CHECK-LABEL: @fmuladd_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcC64 to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
-  %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %c0 = load double, ptr @srcC64, align 8
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
   %fmuladd0 = call double @llvm.fmuladd.f64(double %a0, double %b0, double %c0)
   %fmuladd1 = call double @llvm.fmuladd.f64(double %a1, double %b1, double %c1)
-  store double %fmuladd0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fmuladd1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %fmuladd0, ptr @dst64, align 8
+  store double %fmuladd1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @fmuladd_4f64() #0 {
 ; SSE-LABEL: @fmuladd_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcC64 to <2 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr @dst64, align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> [[TMP6]], <2 x double> [[TMP7]])
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fmuladd_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 8
-; AVX-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcC64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 8
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr @srcC64, align 8
 ; AVX-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]])
-; AVX-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP4]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
-  %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
-  %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
-  %c2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 8
-  %c3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 8
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 8
+  %c0 = load double, ptr @srcC64, align 8
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
+  %c2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 8
+  %c3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 3), align 8
   %fmuladd0 = call double @llvm.fmuladd.f64(double %a0, double %b0, double %c0)
   %fmuladd1 = call double @llvm.fmuladd.f64(double %a1, double %b1, double %c1)
   %fmuladd2 = call double @llvm.fmuladd.f64(double %a2, double %b2, double %c2)
   %fmuladd3 = call double @llvm.fmuladd.f64(double %a3, double %b3, double %c3)
-  store double %fmuladd0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %fmuladd1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %fmuladd2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %fmuladd3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %fmuladd0, ptr @dst64, align 8
+  store double %fmuladd1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %fmuladd2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %fmuladd3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @fmuladd_8f64() #0 {
 ; SSE-LABEL: @fmuladd_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcC64 to <2 x double>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> [[TMP6]], <2 x double> [[TMP7]])
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP9:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]])
-; SSE-NEXT:    store <2 x double> [[TMP12]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP13:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP14:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP15:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP12]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+; SSE-NEXT:    [[TMP14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+; SSE-NEXT:    [[TMP15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP16:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP15]])
-; SSE-NEXT:    store <2 x double> [[TMP16]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP16]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fmuladd_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcC64 to <4 x double>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @srcA64, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr @srcB64, align 4
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr @srcC64, align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]])
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP6:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP7:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr @dst64, align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+; AVX256-NEXT:    [[TMP6:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+; AVX256-NEXT:    [[TMP7:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP8:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x double> [[TMP7]])
-; AVX256-NEXT:    store <4 x double> [[TMP8]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fmuladd_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcA64 to <8 x double>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcB64 to <8 x double>*), align 4
-; AVX512-NEXT:    [[TMP3:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcC64 to <8 x double>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @srcA64, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x double>, ptr @srcB64, align 4
+; AVX512-NEXT:    [[TMP3:%.*]] = load <8 x double>, ptr @srcC64, align 4
 ; AVX512-NEXT:    [[TMP4:%.*]] = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x double> [[TMP3]])
-; AVX512-NEXT:    store <8 x double> [[TMP4]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT:    store <8 x double> [[TMP4]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
-  %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
-  %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
-  %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
-  %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
-  %b4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
-  %b5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
-  %b6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
-  %b7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
-  %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 4
-  %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 4
-  %c2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 4
-  %c3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 4
-  %c4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4), align 4
-  %c5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 5), align 4
-  %c6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 6), align 4
-  %c7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @srcA64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 7), align 4
+  %b0 = load double, ptr @srcB64, align 4
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 4
+  %b2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 2), align 4
+  %b3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 3), align 4
+  %b4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 4), align 4
+  %b5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 5), align 4
+  %b6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 6), align 4
+  %b7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 7), align 4
+  %c0 = load double, ptr @srcC64, align 4
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 4
+  %c2 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 2), align 4
+  %c3 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 3), align 4
+  %c4 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 4), align 4
+  %c5 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 5), align 4
+  %c6 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 6), align 4
+  %c7 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 7), align 4
   %fmuladd0 = call double @llvm.fmuladd.f64(double %a0, double %b0, double %c0)
   %fmuladd1 = call double @llvm.fmuladd.f64(double %a1, double %b1, double %c1)
   %fmuladd2 = call double @llvm.fmuladd.f64(double %a2, double %b2, double %c2)
@@ -168,95 +168,95 @@ define void @fmuladd_8f64() #0 {
   %fmuladd5 = call double @llvm.fmuladd.f64(double %a5, double %b5, double %c5)
   %fmuladd6 = call double @llvm.fmuladd.f64(double %a6, double %b6, double %c6)
   %fmuladd7 = call double @llvm.fmuladd.f64(double %a7, double %b7, double %c7)
-  store double %fmuladd0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %fmuladd1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %fmuladd2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %fmuladd3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %fmuladd4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %fmuladd5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %fmuladd6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %fmuladd7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %fmuladd0, ptr @dst64, align 4
+  store double %fmuladd1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %fmuladd2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %fmuladd3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %fmuladd4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %fmuladd5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %fmuladd6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %fmuladd7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fmuladd_4f32() #0 {
 ; CHECK-LABEL: @fmuladd_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcC32 to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr @srcC32, align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-  %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-  %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-  %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %c0 = load float, ptr @srcC32, align 4
+  %c1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+  %c2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+  %c3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
   %fmuladd0 = call float @llvm.fmuladd.f32(float %a0, float %b0, float %c0)
   %fmuladd1 = call float @llvm.fmuladd.f32(float %a1, float %b1, float %c1)
   %fmuladd2 = call float @llvm.fmuladd.f32(float %a2, float %b2, float %c2)
   %fmuladd3 = call float @llvm.fmuladd.f32(float %a3, float %b3, float %c3)
-  store float %fmuladd0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fmuladd1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fmuladd2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fmuladd3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %fmuladd0, ptr @dst32, align 4
+  store float %fmuladd1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fmuladd2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fmuladd3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @fmuladd_8f32() #0 {
 ; SSE-LABEL: @fmuladd_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcC32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr @srcC32, align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP6:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[TMP7]])
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fmuladd_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
-; AVX-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcC32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr @srcC32, align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]])
-; AVX-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP4]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
-  %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
-  %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
-  %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
-  %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
-  %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
-  %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
-  %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
-  %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
-  %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
-  %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
-  %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
-  %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
-  %c4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
-  %c5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
-  %c6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
-  %c7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @srcA32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 7), align 4
+  %b0 = load float, ptr @srcB32, align 4
+  %b1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 1), align 4
+  %b2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 2), align 4
+  %b3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 3), align 4
+  %b4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+  %b5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 5), align 4
+  %b6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 6), align 4
+  %b7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 7), align 4
+  %c0 = load float, ptr @srcC32, align 4
+  %c1 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 1), align 4
+  %c2 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 2), align 4
+  %c3 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 3), align 4
+  %c4 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 4), align 4
+  %c5 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 5), align 4
+  %c6 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 6), align 4
+  %c7 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 7), align 4
   %fmuladd0 = call float @llvm.fmuladd.f32(float %a0, float %b0, float %c0)
   %fmuladd1 = call float @llvm.fmuladd.f32(float %a1, float %b1, float %c1)
   %fmuladd2 = call float @llvm.fmuladd.f32(float %a2, float %b2, float %c2)
@@ -265,110 +265,110 @@ define void @fmuladd_8f32() #0 {
   %fmuladd5 = call float @llvm.fmuladd.f32(float %a5, float %b5, float %c5)
   %fmuladd6 = call float @llvm.fmuladd.f32(float %a6, float %b6, float %c6)
   %fmuladd7 = call float @llvm.fmuladd.f32(float %a7, float %b7, float %c7)
-  store float %fmuladd0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %fmuladd1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %fmuladd2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %fmuladd3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %fmuladd4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %fmuladd5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %fmuladd6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %fmuladd7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %fmuladd0, ptr @dst32, align 4
+  store float %fmuladd1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %fmuladd2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %fmuladd3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %fmuladd4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %fmuladd5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %fmuladd6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %fmuladd7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fmuladd_16f32() #0 {
 ; SSE-LABEL: @fmuladd_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcC32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @srcA32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @srcB32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr @srcC32, align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP6:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[TMP7]])
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP9:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP9:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]])
-; SSE-NEXT:    store <4 x float> [[TMP12]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP13:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP14:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP15:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP13:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP14:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP15:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP16:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]])
-; SSE-NEXT:    store <4 x float> [[TMP16]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP16]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @fmuladd_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcC32 to <8 x float>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @srcA32, align 4
+; AVX256-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @srcB32, align 4
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr @srcC32, align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]])
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP6:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP7:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr @dst32, align 4
+; AVX256-NEXT:    [[TMP5:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 8), align 4
+; AVX256-NEXT:    [[TMP6:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 8), align 4
+; AVX256-NEXT:    [[TMP7:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP8:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP5]], <8 x float> [[TMP6]], <8 x float> [[TMP7]])
-; AVX256-NEXT:    store <8 x float> [[TMP8]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fmuladd_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcA32 to <16 x float>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcB32 to <16 x float>*), align 4
-; AVX512-NEXT:    [[TMP3:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcC32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @srcA32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x float>, ptr @srcB32, align 4
+; AVX512-NEXT:    [[TMP3:%.*]] = load <16 x float>, ptr @srcC32, align 4
 ; AVX512-NEXT:    [[TMP4:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x float> [[TMP3]])
-; AVX512-NEXT:    store <16 x float> [[TMP4]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP4]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
-  %b0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  0), align 4
-  %b1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  1), align 4
-  %b2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  2), align 4
-  %b3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  3), align 4
-  %b4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  4), align 4
-  %b5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  5), align 4
-  %b6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  6), align 4
-  %b7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  7), align 4
-  %b8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  8), align 4
-  %b9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64  9), align 4
-  %b10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
-  %b11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
-  %b12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
-  %b13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
-  %b14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
-  %b15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
-  %c0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  0), align 4
-  %c1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  1), align 4
-  %c2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  2), align 4
-  %c3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  3), align 4
-  %c4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  4), align 4
-  %c5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  5), align 4
-  %c6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  6), align 4
-  %c7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  7), align 4
-  %c8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  8), align 4
-  %c9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64  9), align 4
-  %c10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 10), align 4
-  %c11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 11), align 4
-  %c12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 12), align 4
-  %c13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 13), align 4
-  %c14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 14), align 4
-  %c15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @srcA32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcA32, i32 0, i64 15), align 4
+  %b0  = load float, ptr @srcB32, align 4
+  %b1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  1), align 4
+  %b2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  2), align 4
+  %b3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  3), align 4
+  %b4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  4), align 4
+  %b5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  5), align 4
+  %b6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  6), align 4
+  %b7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  7), align 4
+  %b8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  8), align 4
+  %b9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64  9), align 4
+  %b10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 10), align 4
+  %b11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 11), align 4
+  %b12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 12), align 4
+  %b13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 13), align 4
+  %b14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 14), align 4
+  %b15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcB32, i32 0, i64 15), align 4
+  %c0  = load float, ptr @srcC32, align 4
+  %c1  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  1), align 4
+  %c2  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  2), align 4
+  %c3  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  3), align 4
+  %c4  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  4), align 4
+  %c5  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  5), align 4
+  %c6  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  6), align 4
+  %c7  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  7), align 4
+  %c8  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  8), align 4
+  %c9  = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64  9), align 4
+  %c10 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 10), align 4
+  %c11 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 11), align 4
+  %c12 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 12), align 4
+  %c13 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 13), align 4
+  %c14 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 14), align 4
+  %c15 = load float, ptr getelementptr inbounds ([16 x float], ptr @srcC32, i32 0, i64 15), align 4
   %fmuladd0  = call float @llvm.fmuladd.f32(float %a0 , float %b0 , float %c0 )
   %fmuladd1  = call float @llvm.fmuladd.f32(float %a1 , float %b1 , float %c1 )
   %fmuladd2  = call float @llvm.fmuladd.f32(float %a2 , float %b2 , float %c2 )
@@ -385,22 +385,22 @@ define void @fmuladd_16f32() #0 {
   %fmuladd13 = call float @llvm.fmuladd.f32(float %a13, float %b13, float %c13)
   %fmuladd14 = call float @llvm.fmuladd.f32(float %a14, float %b14, float %c14)
   %fmuladd15 = call float @llvm.fmuladd.f32(float %a15, float %b15, float %c15)
-  store float %fmuladd0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %fmuladd1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %fmuladd2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %fmuladd3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %fmuladd4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %fmuladd5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %fmuladd6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %fmuladd7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %fmuladd8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %fmuladd9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %fmuladd10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %fmuladd11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %fmuladd12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %fmuladd13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %fmuladd14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %fmuladd15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %fmuladd0 , ptr @dst32, align 4
+  store float %fmuladd1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %fmuladd2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %fmuladd3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %fmuladd4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %fmuladd5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %fmuladd6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %fmuladd7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %fmuladd8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %fmuladd9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %fmuladd10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %fmuladd11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %fmuladd12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %fmuladd13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %fmuladd14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %fmuladd15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fptosi-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptosi-inseltpoison.ll
index ee757d3f5c686..9223a77bc5263 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fptosi-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fptosi-inseltpoison.ll
@@ -21,14 +21,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @fptosi_8f64_8i64() #0 {
 ; SSE-LABEL: @fptosi_8f64_8i64(
-; SSE-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = fptosi double [[A0]] to i64
 ; SSE-NEXT:    [[CVT1:%.*]] = fptosi double [[A1]] to i64
 ; SSE-NEXT:    [[CVT2:%.*]] = fptosi double [[A2]] to i64
@@ -37,25 +37,25 @@ define void @fptosi_8f64_8i64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = fptosi double [[A5]] to i64
 ; SSE-NEXT:    [[CVT6:%.*]] = fptosi double [[A6]] to i64
 ; SSE-NEXT:    [[CVT7:%.*]] = fptosi double [[A7]] to i64
-; SSE-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @fptosi_8f64_8i64(
-; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
+; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptosi double [[A0]] to i64
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptosi double [[A1]] to i64
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptosi double [[A2]] to i64
@@ -64,39 +64,39 @@ define void @fptosi_8f64_8i64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptosi double [[A5]] to i64
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptosi double [[A6]] to i64
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptosi double [[A7]] to i64
-; AVX256NODQ-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fptosi_8f64_8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i64>
-; AVX512-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @fptosi_8f64_8i64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptosi <4 x double> [[TMP3]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i64
   %cvt1 = fptosi double %a1 to i64
   %cvt2 = fptosi double %a2 to i64
@@ -105,41 +105,41 @@ define void @fptosi_8f64_8i64() #0 {
   %cvt5 = fptosi double %a5 to i64
   %cvt6 = fptosi double %a6 to i64
   %cvt7 = fptosi double %a7 to i64
-  store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-  store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-  store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-  store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-  store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-  store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-  store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+  store i64 %cvt0, ptr @dst64, align 8
+  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @fptosi_8f64_8i32() #0 {
 ; SSE-LABEL: @fptosi_8f64_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = fptosi <4 x double> [[TMP3]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fptosi_8f64_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i32>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i32
   %cvt1 = fptosi double %a1 to i32
   %cvt2 = fptosi double %a2 to i32
@@ -148,32 +148,32 @@ define void @fptosi_8f64_8i32() #0 {
   %cvt5 = fptosi double %a5 to i32
   %cvt6 = fptosi double %a6 to i32
   %cvt7 = fptosi double %a7 to i32
-  store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-  store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-  store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-  store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-  store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+  store i32 %cvt0, ptr @dst32, align 4
+  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
+  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
+  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
+  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
+  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fptosi_8f64_8i16() #0 {
 ; CHECK-LABEL: @fptosi_8f64_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i16
   %cvt1 = fptosi double %a1 to i16
   %cvt2 = fptosi double %a2 to i16
@@ -182,32 +182,32 @@ define void @fptosi_8f64_8i16() #0 {
   %cvt5 = fptosi double %a5 to i16
   %cvt6 = fptosi double %a6 to i16
   %cvt7 = fptosi double %a7 to i16
-  store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-  store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-  store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-  store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-  store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-  store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-  store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-  store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
+  store i16 %cvt0, ptr @dst16, align 2
+  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
+  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
+  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
+  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
+  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
+  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
+  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
   ret void
 }
 
 define void @fptosi_8f64_8i8() #0 {
 ; CHECK-LABEL: @fptosi_8f64_8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i8
   %cvt1 = fptosi double %a1 to i8
   %cvt2 = fptosi double %a2 to i8
@@ -216,14 +216,14 @@ define void @fptosi_8f64_8i8() #0 {
   %cvt5 = fptosi double %a5 to i8
   %cvt6 = fptosi double %a6 to i8
   %cvt7 = fptosi double %a7 to i8
-  store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
-  store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
-  store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
-  store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
-  store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
-  store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
-  store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
-  store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
+  store i8 %cvt0, ptr @dst8, align 1
+  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
+  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
+  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
+  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
+  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
+  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
+  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
   ret void
 }
 
@@ -233,14 +233,14 @@ define void @fptosi_8f64_8i8() #0 {
 
 define void @fptosi_8f32_8i64() #0 {
 ; SSE-LABEL: @fptosi_8f32_8i64(
-; SSE-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
+; SSE-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE-NEXT:    [[CVT0:%.*]] = fptosi float [[A0]] to i64
 ; SSE-NEXT:    [[CVT1:%.*]] = fptosi float [[A1]] to i64
 ; SSE-NEXT:    [[CVT2:%.*]] = fptosi float [[A2]] to i64
@@ -249,25 +249,25 @@ define void @fptosi_8f32_8i64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = fptosi float [[A5]] to i64
 ; SSE-NEXT:    [[CVT6:%.*]] = fptosi float [[A6]] to i64
 ; SSE-NEXT:    [[CVT7:%.*]] = fptosi float [[A7]] to i64
-; SSE-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @fptosi_8f32_8i64(
-; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
+; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptosi float [[A0]] to i64
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptosi float [[A1]] to i64
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptosi float [[A2]] to i64
@@ -276,39 +276,39 @@ define void @fptosi_8f32_8i64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptosi float [[A5]] to i64
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptosi float [[A6]] to i64
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptosi float [[A7]] to i64
-; AVX256NODQ-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fptosi_8f32_8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i64>
-; AVX512-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @fptosi_8f32_8i64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i64
   %cvt1 = fptosi float %a1 to i64
   %cvt2 = fptosi float %a2 to i64
@@ -317,41 +317,41 @@ define void @fptosi_8f32_8i64() #0 {
   %cvt5 = fptosi float %a5 to i64
   %cvt6 = fptosi float %a6 to i64
   %cvt7 = fptosi float %a7 to i64
-  store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-  store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-  store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-  store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-  store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-  store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-  store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+  store i64 %cvt0, ptr @dst64, align 8
+  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @fptosi_8f32_8i32() #0 {
 ; SSE-LABEL: @fptosi_8f32_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fptosi_8f32_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i32>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i32
   %cvt1 = fptosi float %a1 to i32
   %cvt2 = fptosi float %a2 to i32
@@ -360,32 +360,32 @@ define void @fptosi_8f32_8i32() #0 {
   %cvt5 = fptosi float %a5 to i32
   %cvt6 = fptosi float %a6 to i32
   %cvt7 = fptosi float %a7 to i32
-  store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-  store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-  store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-  store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-  store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+  store i32 %cvt0, ptr @dst32, align 4
+  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
+  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
+  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
+  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
+  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fptosi_8f32_8i16() #0 {
 ; CHECK-LABEL: @fptosi_8f32_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i16
   %cvt1 = fptosi float %a1 to i16
   %cvt2 = fptosi float %a2 to i16
@@ -394,32 +394,32 @@ define void @fptosi_8f32_8i16() #0 {
   %cvt5 = fptosi float %a5 to i16
   %cvt6 = fptosi float %a6 to i16
   %cvt7 = fptosi float %a7 to i16
-  store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-  store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-  store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-  store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-  store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-  store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-  store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-  store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
+  store i16 %cvt0, ptr @dst16, align 2
+  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
+  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
+  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
+  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
+  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
+  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
+  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
   ret void
 }
 
 define void @fptosi_8f32_8i8() #0 {
 ; CHECK-LABEL: @fptosi_8f32_8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i8
   %cvt1 = fptosi float %a1 to i8
   %cvt2 = fptosi float %a2 to i8
@@ -428,14 +428,14 @@ define void @fptosi_8f32_8i8() #0 {
   %cvt5 = fptosi float %a5 to i8
   %cvt6 = fptosi float %a6 to i8
   %cvt7 = fptosi float %a7 to i8
-  store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
-  store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
-  store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
-  store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
-  store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
-  store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
-  store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
-  store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
+  store i8 %cvt0, ptr @dst8, align 1
+  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
+  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
+  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
+  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
+  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
+  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
+  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fptosi.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptosi.ll
index 724806c5d6dd3..7959e64e5e141 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fptosi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fptosi.ll
@@ -21,14 +21,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @fptosi_8f64_8i64() #0 {
 ; SSE-LABEL: @fptosi_8f64_8i64(
-; SSE-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = fptosi double [[A0]] to i64
 ; SSE-NEXT:    [[CVT1:%.*]] = fptosi double [[A1]] to i64
 ; SSE-NEXT:    [[CVT2:%.*]] = fptosi double [[A2]] to i64
@@ -37,25 +37,25 @@ define void @fptosi_8f64_8i64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = fptosi double [[A5]] to i64
 ; SSE-NEXT:    [[CVT6:%.*]] = fptosi double [[A6]] to i64
 ; SSE-NEXT:    [[CVT7:%.*]] = fptosi double [[A7]] to i64
-; SSE-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @fptosi_8f64_8i64(
-; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
+; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptosi double [[A0]] to i64
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptosi double [[A1]] to i64
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptosi double [[A2]] to i64
@@ -64,39 +64,39 @@ define void @fptosi_8f64_8i64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptosi double [[A5]] to i64
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptosi double [[A6]] to i64
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptosi double [[A7]] to i64
-; AVX256NODQ-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fptosi_8f64_8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i64>
-; AVX512-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @fptosi_8f64_8i64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptosi <4 x double> [[TMP3]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i64
   %cvt1 = fptosi double %a1 to i64
   %cvt2 = fptosi double %a2 to i64
@@ -105,41 +105,41 @@ define void @fptosi_8f64_8i64() #0 {
   %cvt5 = fptosi double %a5 to i64
   %cvt6 = fptosi double %a6 to i64
   %cvt7 = fptosi double %a7 to i64
-  store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-  store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-  store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-  store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-  store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-  store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-  store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+  store i64 %cvt0, ptr @dst64, align 8
+  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @fptosi_8f64_8i32() #0 {
 ; SSE-LABEL: @fptosi_8f64_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = fptosi <4 x double> [[TMP1]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = fptosi <4 x double> [[TMP3]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fptosi_8f64_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i32>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i32
   %cvt1 = fptosi double %a1 to i32
   %cvt2 = fptosi double %a2 to i32
@@ -148,32 +148,32 @@ define void @fptosi_8f64_8i32() #0 {
   %cvt5 = fptosi double %a5 to i32
   %cvt6 = fptosi double %a6 to i32
   %cvt7 = fptosi double %a7 to i32
-  store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-  store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-  store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-  store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-  store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+  store i32 %cvt0, ptr @dst32, align 4
+  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
+  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
+  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
+  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
+  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fptosi_8f64_8i16() #0 {
 ; CHECK-LABEL: @fptosi_8f64_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i16
   %cvt1 = fptosi double %a1 to i16
   %cvt2 = fptosi double %a2 to i16
@@ -182,32 +182,32 @@ define void @fptosi_8f64_8i16() #0 {
   %cvt5 = fptosi double %a5 to i16
   %cvt6 = fptosi double %a6 to i16
   %cvt7 = fptosi double %a7 to i16
-  store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-  store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-  store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-  store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-  store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-  store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-  store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-  store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
+  store i16 %cvt0, ptr @dst16, align 2
+  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
+  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
+  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
+  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
+  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
+  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
+  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
   ret void
 }
 
 define void @fptosi_8f64_8i8() #0 {
 ; CHECK-LABEL: @fptosi_8f64_8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x double> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptosi double %a0 to i8
   %cvt1 = fptosi double %a1 to i8
   %cvt2 = fptosi double %a2 to i8
@@ -216,14 +216,14 @@ define void @fptosi_8f64_8i8() #0 {
   %cvt5 = fptosi double %a5 to i8
   %cvt6 = fptosi double %a6 to i8
   %cvt7 = fptosi double %a7 to i8
-  store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
-  store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
-  store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
-  store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
-  store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
-  store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
-  store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
-  store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
+  store i8 %cvt0, ptr @dst8, align 1
+  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
+  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
+  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
+  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
+  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
+  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
+  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
   ret void
 }
 
@@ -233,14 +233,14 @@ define void @fptosi_8f64_8i8() #0 {
 
 define void @fptosi_8f32_8i64() #0 {
 ; SSE-LABEL: @fptosi_8f32_8i64(
-; SSE-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
+; SSE-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE-NEXT:    [[CVT0:%.*]] = fptosi float [[A0]] to i64
 ; SSE-NEXT:    [[CVT1:%.*]] = fptosi float [[A1]] to i64
 ; SSE-NEXT:    [[CVT2:%.*]] = fptosi float [[A2]] to i64
@@ -249,25 +249,25 @@ define void @fptosi_8f32_8i64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = fptosi float [[A5]] to i64
 ; SSE-NEXT:    [[CVT6:%.*]] = fptosi float [[A6]] to i64
 ; SSE-NEXT:    [[CVT7:%.*]] = fptosi float [[A7]] to i64
-; SSE-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @fptosi_8f32_8i64(
-; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
+; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptosi float [[A0]] to i64
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptosi float [[A1]] to i64
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptosi float [[A2]] to i64
@@ -276,39 +276,39 @@ define void @fptosi_8f32_8i64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptosi float [[A5]] to i64
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptosi float [[A6]] to i64
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptosi float [[A7]] to i64
-; AVX256NODQ-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fptosi_8f32_8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i64>
-; AVX512-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @fptosi_8f32_8i64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i64
   %cvt1 = fptosi float %a1 to i64
   %cvt2 = fptosi float %a2 to i64
@@ -317,41 +317,41 @@ define void @fptosi_8f32_8i64() #0 {
   %cvt5 = fptosi float %a5 to i64
   %cvt6 = fptosi float %a6 to i64
   %cvt7 = fptosi float %a7 to i64
-  store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-  store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-  store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-  store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-  store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-  store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-  store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+  store i64 %cvt0, ptr @dst64, align 8
+  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @fptosi_8f32_8i32() #0 {
 ; SSE-LABEL: @fptosi_8f32_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fptosi_8f32_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i32>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i32
   %cvt1 = fptosi float %a1 to i32
   %cvt2 = fptosi float %a2 to i32
@@ -360,32 +360,32 @@ define void @fptosi_8f32_8i32() #0 {
   %cvt5 = fptosi float %a5 to i32
   %cvt6 = fptosi float %a6 to i32
   %cvt7 = fptosi float %a7 to i32
-  store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-  store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-  store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-  store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-  store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+  store i32 %cvt0, ptr @dst32, align 4
+  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
+  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
+  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
+  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
+  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fptosi_8f32_8i16() #0 {
 ; CHECK-LABEL: @fptosi_8f32_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i16
   %cvt1 = fptosi float %a1 to i16
   %cvt2 = fptosi float %a2 to i16
@@ -394,32 +394,32 @@ define void @fptosi_8f32_8i16() #0 {
   %cvt5 = fptosi float %a5 to i16
   %cvt6 = fptosi float %a6 to i16
   %cvt7 = fptosi float %a7 to i16
-  store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-  store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-  store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-  store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-  store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-  store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-  store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-  store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
+  store i16 %cvt0, ptr @dst16, align 2
+  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
+  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
+  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
+  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
+  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
+  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
+  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
   ret void
 }
 
 define void @fptosi_8f32_8i8() #0 {
 ; CHECK-LABEL: @fptosi_8f32_8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptosi <8 x float> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptosi float %a0 to i8
   %cvt1 = fptosi float %a1 to i8
   %cvt2 = fptosi float %a2 to i8
@@ -428,14 +428,14 @@ define void @fptosi_8f32_8i8() #0 {
   %cvt5 = fptosi float %a5 to i8
   %cvt6 = fptosi float %a6 to i8
   %cvt7 = fptosi float %a7 to i8
-  store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
-  store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
-  store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
-  store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
-  store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
-  store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
-  store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
-  store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
+  store i8 %cvt0, ptr @dst8, align 1
+  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
+  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
+  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
+  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
+  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
+  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
+  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
index 44677371e9084..003a0f792c6a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
@@ -21,14 +21,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @fptoui_8f64_8i64() #0 {
 ; SSE-LABEL: @fptoui_8f64_8i64(
-; SSE-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = fptoui double [[A0]] to i64
 ; SSE-NEXT:    [[CVT1:%.*]] = fptoui double [[A1]] to i64
 ; SSE-NEXT:    [[CVT2:%.*]] = fptoui double [[A2]] to i64
@@ -37,25 +37,25 @@ define void @fptoui_8f64_8i64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = fptoui double [[A5]] to i64
 ; SSE-NEXT:    [[CVT6:%.*]] = fptoui double [[A6]] to i64
 ; SSE-NEXT:    [[CVT7:%.*]] = fptoui double [[A7]] to i64
-; SSE-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @fptoui_8f64_8i64(
-; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    [[A0:%.*]] = load double, ptr @src64, align 8
+; AVX256NODQ-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptoui double [[A0]] to i64
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptoui double [[A1]] to i64
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptoui double [[A2]] to i64
@@ -64,39 +64,39 @@ define void @fptoui_8f64_8i64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptoui double [[A5]] to i64
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptoui double [[A6]] to i64
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptoui double [[A7]] to i64
-; AVX256NODQ-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @fptoui_8f64_8i64(
-; AVX512F-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512F-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512F-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i64>
-; AVX512F-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
+; AVX512F-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @fptoui_8f64_8i64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptoui <4 x double> [[TMP3]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptoui double %a0 to i64
   %cvt1 = fptoui double %a1 to i64
   %cvt2 = fptoui double %a2 to i64
@@ -105,41 +105,41 @@ define void @fptoui_8f64_8i64() #0 {
   %cvt5 = fptoui double %a5 to i64
   %cvt6 = fptoui double %a6 to i64
   %cvt7 = fptoui double %a7 to i64
-  store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-  store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-  store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-  store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-  store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-  store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-  store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+  store i64 %cvt0, ptr @dst64, align 8
+  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @fptoui_8f64_8i32() #0 {
 ; SSE-LABEL: @fptoui_8f64_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = fptoui <4 x double> [[TMP3]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fptoui_8f64_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i32>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptoui double %a0 to i32
   %cvt1 = fptoui double %a1 to i32
   %cvt2 = fptoui double %a2 to i32
@@ -148,32 +148,32 @@ define void @fptoui_8f64_8i32() #0 {
   %cvt5 = fptoui double %a5 to i32
   %cvt6 = fptoui double %a6 to i32
   %cvt7 = fptoui double %a7 to i32
-  store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-  store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-  store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-  store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-  store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+  store i32 %cvt0, ptr @dst32, align 4
+  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
+  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
+  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
+  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
+  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fptoui_8f64_8i16() #0 {
 ; CHECK-LABEL: @fptoui_8f64_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptoui double %a0 to i16
   %cvt1 = fptoui double %a1 to i16
   %cvt2 = fptoui double %a2 to i16
@@ -182,32 +182,32 @@ define void @fptoui_8f64_8i16() #0 {
   %cvt5 = fptoui double %a5 to i16
   %cvt6 = fptoui double %a6 to i16
   %cvt7 = fptoui double %a7 to i16
-  store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-  store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-  store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-  store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-  store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-  store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-  store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-  store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
+  store i16 %cvt0, ptr @dst16, align 2
+  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
+  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
+  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
+  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
+  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
+  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
+  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
   ret void
 }
 
 define void @fptoui_8f64_8i8() #0 {
 ; CHECK-LABEL: @fptoui_8f64_8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = fptoui double %a0 to i8
   %cvt1 = fptoui double %a1 to i8
   %cvt2 = fptoui double %a2 to i8
@@ -216,14 +216,14 @@ define void @fptoui_8f64_8i8() #0 {
   %cvt5 = fptoui double %a5 to i8
   %cvt6 = fptoui double %a6 to i8
   %cvt7 = fptoui double %a7 to i8
-  store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
-  store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
-  store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
-  store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
-  store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
-  store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
-  store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
-  store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
+  store i8 %cvt0, ptr @dst8, align 1
+  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
+  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
+  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
+  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
+  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
+  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
+  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
   ret void
 }
 
@@ -233,14 +233,14 @@ define void @fptoui_8f64_8i8() #0 {
 
 define void @fptoui_8f32_8i64() #0 {
 ; SSE-LABEL: @fptoui_8f32_8i64(
-; SSE-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
+; SSE-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE-NEXT:    [[CVT0:%.*]] = fptoui float [[A0]] to i64
 ; SSE-NEXT:    [[CVT1:%.*]] = fptoui float [[A1]] to i64
 ; SSE-NEXT:    [[CVT2:%.*]] = fptoui float [[A2]] to i64
@@ -249,25 +249,25 @@ define void @fptoui_8f32_8i64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = fptoui float [[A5]] to i64
 ; SSE-NEXT:    [[CVT6:%.*]] = fptoui float [[A6]] to i64
 ; SSE-NEXT:    [[CVT7:%.*]] = fptoui float [[A7]] to i64
-; SSE-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; SSE-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @fptoui_8f32_8i64(
-; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; AVX256NODQ-NEXT:    [[A0:%.*]] = load float, ptr @src32, align 4
+; AVX256NODQ-NEXT:    [[A1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; AVX256NODQ-NEXT:    [[A2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; AVX256NODQ-NEXT:    [[A3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; AVX256NODQ-NEXT:    [[A4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; AVX256NODQ-NEXT:    [[A5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; AVX256NODQ-NEXT:    [[A6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; AVX256NODQ-NEXT:    [[A7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = fptoui float [[A0]] to i64
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = fptoui float [[A1]] to i64
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = fptoui float [[A2]] to i64
@@ -276,39 +276,39 @@ define void @fptoui_8f32_8i64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = fptoui float [[A5]] to i64
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = fptoui float [[A6]] to i64
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = fptoui float [[A7]] to i64
-; AVX256NODQ-NEXT:    store i64 [[CVT0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-; AVX256NODQ-NEXT:    store i64 [[CVT7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT0]], ptr @dst64, align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT1]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT2]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT3]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT5]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT6]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+; AVX256NODQ-NEXT:    store i64 [[CVT7]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @fptoui_8f32_8i64(
-; AVX512F-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX512F-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX512F-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i64>
-; AVX512F-NEXT:    store <8 x i64> [[TMP2]], <8 x i64>* bitcast ([8 x i64]* @dst64 to <8 x i64>*), align 8
+; AVX512F-NEXT:    store <8 x i64> [[TMP2]], ptr @dst64, align 8
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @fptoui_8f32_8i64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([8 x i64]* @dst64 to <4 x i64>*), align 8
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP2]], ptr @dst64, align 8
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i64>
-; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX256DQ-NEXT:    store <4 x i64> [[TMP4]], ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
 ; AVX256DQ-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptoui float %a0 to i64
   %cvt1 = fptoui float %a1 to i64
   %cvt2 = fptoui float %a2 to i64
@@ -317,41 +317,41 @@ define void @fptoui_8f32_8i64() #0 {
   %cvt5 = fptoui float %a5 to i64
   %cvt6 = fptoui float %a6 to i64
   %cvt7 = fptoui float %a7 to i64
-  store i64 %cvt0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 0), align 8
-  store i64 %cvt1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 1), align 8
-  store i64 %cvt2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 2), align 8
-  store i64 %cvt3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 3), align 8
-  store i64 %cvt4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 4), align 8
-  store i64 %cvt5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 5), align 8
-  store i64 %cvt6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 6), align 8
-  store i64 %cvt7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @dst64, i32 0, i64 7), align 8
+  store i64 %cvt0, ptr @dst64, align 8
+  store i64 %cvt1, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 1), align 8
+  store i64 %cvt2, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 2), align 8
+  store i64 %cvt3, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 3), align 8
+  store i64 %cvt4, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 4), align 8
+  store i64 %cvt5, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 5), align 8
+  store i64 %cvt6, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 6), align 8
+  store i64 %cvt7, ptr getelementptr inbounds ([8 x i64], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @fptoui_8f32_8i32() #0 {
 ; SSE-LABEL: @fptoui_8f32_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32>
-; SSE-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @fptoui_8f32_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i32>
-; AVX-NEXT:    store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptoui float %a0 to i32
   %cvt1 = fptoui float %a1 to i32
   %cvt2 = fptoui float %a2 to i32
@@ -360,32 +360,32 @@ define void @fptoui_8f32_8i32() #0 {
   %cvt5 = fptoui float %a5 to i32
   %cvt6 = fptoui float %a6 to i32
   %cvt7 = fptoui float %a7 to i32
-  store i32 %cvt0, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-  store i32 %cvt1, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-  store i32 %cvt2, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-  store i32 %cvt3, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-  store i32 %cvt4, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-  store i32 %cvt5, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-  store i32 %cvt6, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-  store i32 %cvt7, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+  store i32 %cvt0, ptr @dst32, align 4
+  store i32 %cvt1, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 1), align 4
+  store i32 %cvt2, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 2), align 4
+  store i32 %cvt3, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 3), align 4
+  store i32 %cvt4, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 4), align 4
+  store i32 %cvt5, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 5), align 4
+  store i32 %cvt6, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 6), align 4
+  store i32 %cvt7, ptr getelementptr inbounds ([16 x i32], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @fptoui_8f32_8i16() #0 {
 ; CHECK-LABEL: @fptoui_8f32_8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], ptr @dst16, align 2
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptoui float %a0 to i16
   %cvt1 = fptoui float %a1 to i16
   %cvt2 = fptoui float %a2 to i16
@@ -394,32 +394,32 @@ define void @fptoui_8f32_8i16() #0 {
   %cvt5 = fptoui float %a5 to i16
   %cvt6 = fptoui float %a6 to i16
   %cvt7 = fptoui float %a7 to i16
-  store i16 %cvt0, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-  store i16 %cvt1, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-  store i16 %cvt2, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-  store i16 %cvt3, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-  store i16 %cvt4, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-  store i16 %cvt5, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-  store i16 %cvt6, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-  store i16 %cvt7, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
+  store i16 %cvt0, ptr @dst16, align 2
+  store i16 %cvt1, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 1), align 2
+  store i16 %cvt2, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 2), align 2
+  store i16 %cvt3, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 3), align 2
+  store i16 %cvt4, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 4), align 2
+  store i16 %cvt5, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 5), align 2
+  store i16 %cvt6, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 6), align 2
+  store i16 %cvt7, ptr getelementptr inbounds ([32 x i16], ptr @dst16, i32 0, i64 7), align 2
   ret void
 }
 
 define void @fptoui_8f32_8i8() #0 {
 ; CHECK-LABEL: @fptoui_8f32_8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    store <8 x i8> [[TMP2]], <8 x i8>* bitcast ([64 x i8]* @dst8 to <8 x i8>*), align 1
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr @dst8, align 1
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = fptoui float %a0 to i8
   %cvt1 = fptoui float %a1 to i8
   %cvt2 = fptoui float %a2 to i8
@@ -428,14 +428,14 @@ define void @fptoui_8f32_8i8() #0 {
   %cvt5 = fptoui float %a5 to i8
   %cvt6 = fptoui float %a6 to i8
   %cvt7 = fptoui float %a7 to i8
-  store i8 %cvt0, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 0), align 1
-  store i8 %cvt1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 1), align 1
-  store i8 %cvt2, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 2), align 1
-  store i8 %cvt3, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 3), align 1
-  store i8 %cvt4, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 4), align 1
-  store i8 %cvt5, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 5), align 1
-  store i8 %cvt6, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 6), align 1
-  store i8 %cvt7, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @dst8, i32 0, i64 7), align 1
+  store i8 %cvt0, ptr @dst8, align 1
+  store i8 %cvt1, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 1), align 1
+  store i8 %cvt2, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 2), align 1
+  store i8 %cvt3, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 3), align 1
+  store i8 %cvt4, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 4), align 1
+  store i8 %cvt5, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 5), align 1
+  store i8 %cvt6, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 6), align 1
+  store i8 %cvt7, ptr getelementptr inbounds ([64 x i8], ptr @dst8, i32 0, i64 7), align 1
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fround.ll b/llvm/test/Transforms/SLPVectorizer/X86/fround.ll
index 859346c3cfcf3..3da641023e556 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fround.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fround.ll
@@ -27,91 +27,91 @@ declare float @llvm.trunc.f32(float %p)
 
 define void @ceil_2f64() #0 {
 ; SSE2-LABEL: @ceil_2f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    [[CEIL0:%.*]] = call double @llvm.ceil.f64(double [[LD0]])
 ; SSE2-NEXT:    [[CEIL1:%.*]] = call double @llvm.ceil.f64(double [[LD1]])
-; SSE2-NEXT:    store double [[CEIL0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[CEIL1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[CEIL0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[CEIL1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @ceil_2f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @ceil_2f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
-; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %ceil0 = call double @llvm.ceil.f64(double %ld0)
   %ceil1 = call double @llvm.ceil.f64(double %ld1)
-  store double %ceil0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %ceil1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %ceil0, ptr @dst64, align 8
+  store double %ceil1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @ceil_4f64() #0 {
 ; SSE2-LABEL: @ceil_4f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    [[CEIL0:%.*]] = call double @llvm.ceil.f64(double [[LD0]])
 ; SSE2-NEXT:    [[CEIL1:%.*]] = call double @llvm.ceil.f64(double [[LD1]])
 ; SSE2-NEXT:    [[CEIL2:%.*]] = call double @llvm.ceil.f64(double [[LD2]])
 ; SSE2-NEXT:    [[CEIL3:%.*]] = call double @llvm.ceil.f64(double [[LD3]])
-; SSE2-NEXT:    store double [[CEIL0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[CEIL1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[CEIL2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[CEIL3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[CEIL0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[CEIL1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[CEIL2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[CEIL3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @ceil_4f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @ceil_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %ceil0 = call double @llvm.ceil.f64(double %ld0)
   %ceil1 = call double @llvm.ceil.f64(double %ld1)
   %ceil2 = call double @llvm.ceil.f64(double %ld2)
   %ceil3 = call double @llvm.ceil.f64(double %ld3)
-  store double %ceil0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %ceil1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %ceil2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %ceil3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %ceil0, ptr @dst64, align 8
+  store double %ceil1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %ceil2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %ceil3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @ceil_8f64() #0 {
 ; SSE2-LABEL: @ceil_8f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE2-NEXT:    [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE2-NEXT:    [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE2-NEXT:    [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE2-NEXT:    [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE2-NEXT:    [[LD5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE2-NEXT:    [[LD6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE2-NEXT:    [[LD7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    [[CEIL0:%.*]] = call double @llvm.ceil.f64(double [[LD0]])
 ; SSE2-NEXT:    [[CEIL1:%.*]] = call double @llvm.ceil.f64(double [[LD1]])
 ; SSE2-NEXT:    [[CEIL2:%.*]] = call double @llvm.ceil.f64(double [[LD2]])
@@ -120,63 +120,63 @@ define void @ceil_8f64() #0 {
 ; SSE2-NEXT:    [[CEIL5:%.*]] = call double @llvm.ceil.f64(double [[LD5]])
 ; SSE2-NEXT:    [[CEIL6:%.*]] = call double @llvm.ceil.f64(double [[LD6]])
 ; SSE2-NEXT:    [[CEIL7:%.*]] = call double @llvm.ceil.f64(double [[LD7]])
-; SSE2-NEXT:    store double [[CEIL0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[CEIL1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[CEIL2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[CEIL3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE2-NEXT:    store double [[CEIL4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-; SSE2-NEXT:    store double [[CEIL5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE2-NEXT:    store double [[CEIL6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-; SSE2-NEXT:    store double [[CEIL7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT:    store double [[CEIL0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[CEIL1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[CEIL2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[CEIL3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[CEIL4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT:    store double [[CEIL5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT:    store double [[CEIL6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT:    store double [[CEIL7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @ceil_8f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP5]])
-; SSE41-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP7]])
-; SSE41-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ceil_8f64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP1]])
-; AVX1-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP3]])
-; AVX1-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ceil_8f64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP1]])
-; AVX2-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP3]])
-; AVX2-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ceil_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.ceil.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %ld5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %ld7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %ceil0 = call double @llvm.ceil.f64(double %ld0)
   %ceil1 = call double @llvm.ceil.f64(double %ld1)
   %ceil2 = call double @llvm.ceil.f64(double %ld2)
@@ -185,104 +185,104 @@ define void @ceil_8f64() #0 {
   %ceil5 = call double @llvm.ceil.f64(double %ld5)
   %ceil6 = call double @llvm.ceil.f64(double %ld6)
   %ceil7 = call double @llvm.ceil.f64(double %ld7)
-  store double %ceil0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %ceil1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %ceil2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %ceil3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %ceil4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-  store double %ceil5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %ceil6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-  store double %ceil7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %ceil0, ptr @dst64, align 8
+  store double %ceil1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %ceil2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %ceil3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %ceil4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+  store double %ceil5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %ceil6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+  store double %ceil7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @floor_2f64() #0 {
 ; SSE2-LABEL: @floor_2f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    [[FLOOR0:%.*]] = call double @llvm.floor.f64(double [[LD0]])
 ; SSE2-NEXT:    [[FLOOR1:%.*]] = call double @llvm.floor.f64(double [[LD1]])
-; SSE2-NEXT:    store double [[FLOOR0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[FLOOR1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[FLOOR0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[FLOOR1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @floor_2f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @floor_2f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
-; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %floor0 = call double @llvm.floor.f64(double %ld0)
   %floor1 = call double @llvm.floor.f64(double %ld1)
-  store double %floor0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %floor1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %floor0, ptr @dst64, align 8
+  store double %floor1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @floor_4f64() #0 {
 ; SSE2-LABEL: @floor_4f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    [[FLOOR0:%.*]] = call double @llvm.floor.f64(double [[LD0]])
 ; SSE2-NEXT:    [[FLOOR1:%.*]] = call double @llvm.floor.f64(double [[LD1]])
 ; SSE2-NEXT:    [[FLOOR2:%.*]] = call double @llvm.floor.f64(double [[LD2]])
 ; SSE2-NEXT:    [[FLOOR3:%.*]] = call double @llvm.floor.f64(double [[LD3]])
-; SSE2-NEXT:    store double [[FLOOR0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[FLOOR1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[FLOOR2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[FLOOR3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[FLOOR0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[FLOOR1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[FLOOR2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[FLOOR3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @floor_4f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @floor_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %floor0 = call double @llvm.floor.f64(double %ld0)
   %floor1 = call double @llvm.floor.f64(double %ld1)
   %floor2 = call double @llvm.floor.f64(double %ld2)
   %floor3 = call double @llvm.floor.f64(double %ld3)
-  store double %floor0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %floor1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %floor2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %floor3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %floor0, ptr @dst64, align 8
+  store double %floor1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %floor2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %floor3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @floor_8f64() #0 {
 ; SSE2-LABEL: @floor_8f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE2-NEXT:    [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE2-NEXT:    [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE2-NEXT:    [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE2-NEXT:    [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE2-NEXT:    [[LD5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE2-NEXT:    [[LD6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE2-NEXT:    [[LD7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    [[FLOOR0:%.*]] = call double @llvm.floor.f64(double [[LD0]])
 ; SSE2-NEXT:    [[FLOOR1:%.*]] = call double @llvm.floor.f64(double [[LD1]])
 ; SSE2-NEXT:    [[FLOOR2:%.*]] = call double @llvm.floor.f64(double [[LD2]])
@@ -291,63 +291,63 @@ define void @floor_8f64() #0 {
 ; SSE2-NEXT:    [[FLOOR5:%.*]] = call double @llvm.floor.f64(double [[LD5]])
 ; SSE2-NEXT:    [[FLOOR6:%.*]] = call double @llvm.floor.f64(double [[LD6]])
 ; SSE2-NEXT:    [[FLOOR7:%.*]] = call double @llvm.floor.f64(double [[LD7]])
-; SSE2-NEXT:    store double [[FLOOR0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[FLOOR1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[FLOOR2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[FLOOR3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE2-NEXT:    store double [[FLOOR4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-; SSE2-NEXT:    store double [[FLOOR5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE2-NEXT:    store double [[FLOOR6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-; SSE2-NEXT:    store double [[FLOOR7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT:    store double [[FLOOR0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[FLOOR1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[FLOOR2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[FLOOR3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[FLOOR4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT:    store double [[FLOOR5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT:    store double [[FLOOR6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT:    store double [[FLOOR7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @floor_8f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP5]])
-; SSE41-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP7]])
-; SSE41-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @floor_8f64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP1]])
-; AVX1-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP3]])
-; AVX1-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @floor_8f64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP1]])
-; AVX2-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP3]])
-; AVX2-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @floor_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.floor.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %ld5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %ld7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %floor0 = call double @llvm.floor.f64(double %ld0)
   %floor1 = call double @llvm.floor.f64(double %ld1)
   %floor2 = call double @llvm.floor.f64(double %ld2)
@@ -356,104 +356,104 @@ define void @floor_8f64() #0 {
   %floor5 = call double @llvm.floor.f64(double %ld5)
   %floor6 = call double @llvm.floor.f64(double %ld6)
   %floor7 = call double @llvm.floor.f64(double %ld7)
-  store double %floor0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %floor1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %floor2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %floor3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %floor4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-  store double %floor5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %floor6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-  store double %floor7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %floor0, ptr @dst64, align 8
+  store double %floor1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %floor2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %floor3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %floor4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+  store double %floor5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %floor6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+  store double %floor7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @nearbyint_2f64() #0 {
 ; SSE2-LABEL: @nearbyint_2f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    [[NEARBYINT0:%.*]] = call double @llvm.nearbyint.f64(double [[LD0]])
 ; SSE2-NEXT:    [[NEARBYINT1:%.*]] = call double @llvm.nearbyint.f64(double [[LD1]])
-; SSE2-NEXT:    store double [[NEARBYINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[NEARBYINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[NEARBYINT0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[NEARBYINT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @nearbyint_2f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @nearbyint_2f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
-; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %nearbyint0 = call double @llvm.nearbyint.f64(double %ld0)
   %nearbyint1 = call double @llvm.nearbyint.f64(double %ld1)
-  store double %nearbyint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %nearbyint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %nearbyint0, ptr @dst64, align 8
+  store double %nearbyint1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @nearbyint_4f64() #0 {
 ; SSE2-LABEL: @nearbyint_4f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    [[NEARBYINT0:%.*]] = call double @llvm.nearbyint.f64(double [[LD0]])
 ; SSE2-NEXT:    [[NEARBYINT1:%.*]] = call double @llvm.nearbyint.f64(double [[LD1]])
 ; SSE2-NEXT:    [[NEARBYINT2:%.*]] = call double @llvm.nearbyint.f64(double [[LD2]])
 ; SSE2-NEXT:    [[NEARBYINT3:%.*]] = call double @llvm.nearbyint.f64(double [[LD3]])
-; SSE2-NEXT:    store double [[NEARBYINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[NEARBYINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[NEARBYINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[NEARBYINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[NEARBYINT0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[NEARBYINT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[NEARBYINT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[NEARBYINT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @nearbyint_4f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @nearbyint_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %nearbyint0 = call double @llvm.nearbyint.f64(double %ld0)
   %nearbyint1 = call double @llvm.nearbyint.f64(double %ld1)
   %nearbyint2 = call double @llvm.nearbyint.f64(double %ld2)
   %nearbyint3 = call double @llvm.nearbyint.f64(double %ld3)
-  store double %nearbyint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %nearbyint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %nearbyint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %nearbyint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %nearbyint0, ptr @dst64, align 8
+  store double %nearbyint1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %nearbyint2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %nearbyint3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @nearbyint_8f64() #0 {
 ; SSE2-LABEL: @nearbyint_8f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE2-NEXT:    [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE2-NEXT:    [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE2-NEXT:    [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE2-NEXT:    [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE2-NEXT:    [[LD5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE2-NEXT:    [[LD6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE2-NEXT:    [[LD7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    [[NEARBYINT0:%.*]] = call double @llvm.nearbyint.f64(double [[LD0]])
 ; SSE2-NEXT:    [[NEARBYINT1:%.*]] = call double @llvm.nearbyint.f64(double [[LD1]])
 ; SSE2-NEXT:    [[NEARBYINT2:%.*]] = call double @llvm.nearbyint.f64(double [[LD2]])
@@ -462,63 +462,63 @@ define void @nearbyint_8f64() #0 {
 ; SSE2-NEXT:    [[NEARBYINT5:%.*]] = call double @llvm.nearbyint.f64(double [[LD5]])
 ; SSE2-NEXT:    [[NEARBYINT6:%.*]] = call double @llvm.nearbyint.f64(double [[LD6]])
 ; SSE2-NEXT:    [[NEARBYINT7:%.*]] = call double @llvm.nearbyint.f64(double [[LD7]])
-; SSE2-NEXT:    store double [[NEARBYINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[NEARBYINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[NEARBYINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[NEARBYINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE2-NEXT:    store double [[NEARBYINT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-; SSE2-NEXT:    store double [[NEARBYINT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE2-NEXT:    store double [[NEARBYINT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-; SSE2-NEXT:    store double [[NEARBYINT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT:    store double [[NEARBYINT0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[NEARBYINT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[NEARBYINT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[NEARBYINT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[NEARBYINT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT:    store double [[NEARBYINT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT:    store double [[NEARBYINT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT:    store double [[NEARBYINT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @nearbyint_8f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP5]])
-; SSE41-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP7]])
-; SSE41-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @nearbyint_8f64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP1]])
-; AVX1-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP3]])
-; AVX1-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @nearbyint_8f64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP1]])
-; AVX2-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP3]])
-; AVX2-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @nearbyint_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %ld5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %ld7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %nearbyint0 = call double @llvm.nearbyint.f64(double %ld0)
   %nearbyint1 = call double @llvm.nearbyint.f64(double %ld1)
   %nearbyint2 = call double @llvm.nearbyint.f64(double %ld2)
@@ -527,104 +527,104 @@ define void @nearbyint_8f64() #0 {
   %nearbyint5 = call double @llvm.nearbyint.f64(double %ld5)
   %nearbyint6 = call double @llvm.nearbyint.f64(double %ld6)
   %nearbyint7 = call double @llvm.nearbyint.f64(double %ld7)
-  store double %nearbyint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %nearbyint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %nearbyint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %nearbyint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %nearbyint4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-  store double %nearbyint5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %nearbyint6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-  store double %nearbyint7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %nearbyint0, ptr @dst64, align 8
+  store double %nearbyint1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %nearbyint2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %nearbyint3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %nearbyint4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+  store double %nearbyint5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %nearbyint6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+  store double %nearbyint7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @rint_2f64() #0 {
 ; SSE2-LABEL: @rint_2f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    [[RINT0:%.*]] = call double @llvm.rint.f64(double [[LD0]])
 ; SSE2-NEXT:    [[RINT1:%.*]] = call double @llvm.rint.f64(double [[LD1]])
-; SSE2-NEXT:    store double [[RINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[RINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[RINT0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[RINT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @rint_2f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @rint_2f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
-; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %rint0 = call double @llvm.rint.f64(double %ld0)
   %rint1 = call double @llvm.rint.f64(double %ld1)
-  store double %rint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %rint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %rint0, ptr @dst64, align 8
+  store double %rint1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @rint_4f64() #0 {
 ; SSE2-LABEL: @rint_4f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    [[RINT0:%.*]] = call double @llvm.rint.f64(double [[LD0]])
 ; SSE2-NEXT:    [[RINT1:%.*]] = call double @llvm.rint.f64(double [[LD1]])
 ; SSE2-NEXT:    [[RINT2:%.*]] = call double @llvm.rint.f64(double [[LD2]])
 ; SSE2-NEXT:    [[RINT3:%.*]] = call double @llvm.rint.f64(double [[LD3]])
-; SSE2-NEXT:    store double [[RINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[RINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[RINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[RINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[RINT0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[RINT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[RINT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[RINT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @rint_4f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @rint_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %rint0 = call double @llvm.rint.f64(double %ld0)
   %rint1 = call double @llvm.rint.f64(double %ld1)
   %rint2 = call double @llvm.rint.f64(double %ld2)
   %rint3 = call double @llvm.rint.f64(double %ld3)
-  store double %rint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %rint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %rint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %rint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %rint0, ptr @dst64, align 8
+  store double %rint1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %rint2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %rint3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @rint_8f64() #0 {
 ; SSE2-LABEL: @rint_8f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE2-NEXT:    [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE2-NEXT:    [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE2-NEXT:    [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE2-NEXT:    [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE2-NEXT:    [[LD5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE2-NEXT:    [[LD6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE2-NEXT:    [[LD7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    [[RINT0:%.*]] = call double @llvm.rint.f64(double [[LD0]])
 ; SSE2-NEXT:    [[RINT1:%.*]] = call double @llvm.rint.f64(double [[LD1]])
 ; SSE2-NEXT:    [[RINT2:%.*]] = call double @llvm.rint.f64(double [[LD2]])
@@ -633,63 +633,63 @@ define void @rint_8f64() #0 {
 ; SSE2-NEXT:    [[RINT5:%.*]] = call double @llvm.rint.f64(double [[LD5]])
 ; SSE2-NEXT:    [[RINT6:%.*]] = call double @llvm.rint.f64(double [[LD6]])
 ; SSE2-NEXT:    [[RINT7:%.*]] = call double @llvm.rint.f64(double [[LD7]])
-; SSE2-NEXT:    store double [[RINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[RINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[RINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[RINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE2-NEXT:    store double [[RINT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-; SSE2-NEXT:    store double [[RINT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE2-NEXT:    store double [[RINT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-; SSE2-NEXT:    store double [[RINT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT:    store double [[RINT0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[RINT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[RINT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[RINT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[RINT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT:    store double [[RINT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT:    store double [[RINT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT:    store double [[RINT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @rint_8f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP5]])
-; SSE41-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP7]])
-; SSE41-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @rint_8f64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
-; AVX1-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP3]])
-; AVX1-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @rint_8f64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
-; AVX2-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP3]])
-; AVX2-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @rint_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.rint.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %ld5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %ld7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %rint0 = call double @llvm.rint.f64(double %ld0)
   %rint1 = call double @llvm.rint.f64(double %ld1)
   %rint2 = call double @llvm.rint.f64(double %ld2)
@@ -698,104 +698,104 @@ define void @rint_8f64() #0 {
   %rint5 = call double @llvm.rint.f64(double %ld5)
   %rint6 = call double @llvm.rint.f64(double %ld6)
   %rint7 = call double @llvm.rint.f64(double %ld7)
-  store double %rint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %rint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %rint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %rint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %rint4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-  store double %rint5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %rint6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-  store double %rint7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %rint0, ptr @dst64, align 8
+  store double %rint1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %rint2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %rint3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %rint4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+  store double %rint5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %rint6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+  store double %rint7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @trunc_2f64() #0 {
 ; SSE2-LABEL: @trunc_2f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    [[TRUNC0:%.*]] = call double @llvm.trunc.f64(double [[LD0]])
 ; SSE2-NEXT:    [[TRUNC1:%.*]] = call double @llvm.trunc.f64(double [[LD1]])
-; SSE2-NEXT:    store double [[TRUNC0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[TRUNC1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[TRUNC0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[TRUNC1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @trunc_2f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @trunc_2f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
-; AVX-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %trunc0 = call double @llvm.trunc.f64(double %ld0)
   %trunc1 = call double @llvm.trunc.f64(double %ld1)
-  store double %trunc0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %trunc1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %trunc0, ptr @dst64, align 8
+  store double %trunc1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @trunc_4f64() #0 {
 ; SSE2-LABEL: @trunc_4f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    [[TRUNC0:%.*]] = call double @llvm.trunc.f64(double [[LD0]])
 ; SSE2-NEXT:    [[TRUNC1:%.*]] = call double @llvm.trunc.f64(double [[LD1]])
 ; SSE2-NEXT:    [[TRUNC2:%.*]] = call double @llvm.trunc.f64(double [[LD2]])
 ; SSE2-NEXT:    [[TRUNC3:%.*]] = call double @llvm.trunc.f64(double [[LD3]])
-; SSE2-NEXT:    store double [[TRUNC0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[TRUNC1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[TRUNC2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[TRUNC3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[TRUNC0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[TRUNC1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[TRUNC2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[TRUNC3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @trunc_4f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @trunc_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %trunc0 = call double @llvm.trunc.f64(double %ld0)
   %trunc1 = call double @llvm.trunc.f64(double %ld1)
   %trunc2 = call double @llvm.trunc.f64(double %ld2)
   %trunc3 = call double @llvm.trunc.f64(double %ld3)
-  store double %trunc0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %trunc1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %trunc2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %trunc3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %trunc0, ptr @dst64, align 8
+  store double %trunc1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %trunc2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %trunc3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @trunc_8f64() #0 {
 ; SSE2-LABEL: @trunc_8f64(
-; SSE2-NEXT:    [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE2-NEXT:    [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE2-NEXT:    [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE2-NEXT:    [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE2-NEXT:    [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE2-NEXT:    [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE2-NEXT:    [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE2-NEXT:    [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT:    [[LD0:%.*]] = load double, ptr @src64, align 8
+; SSE2-NEXT:    [[LD1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+; SSE2-NEXT:    [[LD2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+; SSE2-NEXT:    [[LD3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+; SSE2-NEXT:    [[LD4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+; SSE2-NEXT:    [[LD5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+; SSE2-NEXT:    [[LD6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+; SSE2-NEXT:    [[LD7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    [[TRUNC0:%.*]] = call double @llvm.trunc.f64(double [[LD0]])
 ; SSE2-NEXT:    [[TRUNC1:%.*]] = call double @llvm.trunc.f64(double [[LD1]])
 ; SSE2-NEXT:    [[TRUNC2:%.*]] = call double @llvm.trunc.f64(double [[LD2]])
@@ -804,63 +804,63 @@ define void @trunc_8f64() #0 {
 ; SSE2-NEXT:    [[TRUNC5:%.*]] = call double @llvm.trunc.f64(double [[LD5]])
 ; SSE2-NEXT:    [[TRUNC6:%.*]] = call double @llvm.trunc.f64(double [[LD6]])
 ; SSE2-NEXT:    [[TRUNC7:%.*]] = call double @llvm.trunc.f64(double [[LD7]])
-; SSE2-NEXT:    store double [[TRUNC0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-; SSE2-NEXT:    store double [[TRUNC1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE2-NEXT:    store double [[TRUNC2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-; SSE2-NEXT:    store double [[TRUNC3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE2-NEXT:    store double [[TRUNC4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-; SSE2-NEXT:    store double [[TRUNC5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE2-NEXT:    store double [[TRUNC6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-; SSE2-NEXT:    store double [[TRUNC7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT:    store double [[TRUNC0]], ptr @dst64, align 8
+; SSE2-NEXT:    store double [[TRUNC1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT:    store double [[TRUNC2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT:    store double [[TRUNC3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT:    store double [[TRUNC4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT:    store double [[TRUNC5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT:    store double [[TRUNC6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT:    store double [[TRUNC7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @trunc_8f64(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
-; SSE41-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE41-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP3]])
-; SSE41-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+; SSE41-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP5]])
-; SSE41-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
-; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+; SSE41-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP7]])
-; SSE41-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @trunc_8f64(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP1]])
-; AVX1-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX1-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP3]])
-; AVX1-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @trunc_8f64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP1]])
-; AVX2-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
-; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
+; AVX2-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP3]])
-; AVX2-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @trunc_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 8
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.trunc.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-  %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-  %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-  %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-  %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+  %ld0 = load double, ptr @src64, align 8
+  %ld1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %ld3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 8
+  %ld5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 8
+  %ld7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 8
   %trunc0 = call double @llvm.trunc.f64(double %ld0)
   %trunc1 = call double @llvm.trunc.f64(double %ld1)
   %trunc2 = call double @llvm.trunc.f64(double %ld2)
@@ -869,70 +869,70 @@ define void @trunc_8f64() #0 {
   %trunc5 = call double @llvm.trunc.f64(double %ld5)
   %trunc6 = call double @llvm.trunc.f64(double %ld6)
   %trunc7 = call double @llvm.trunc.f64(double %ld7)
-  store double %trunc0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %trunc1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %trunc2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %trunc3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %trunc4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
-  store double %trunc5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %trunc6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
-  store double %trunc7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %trunc0, ptr @dst64, align 8
+  store double %trunc1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %trunc2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %trunc3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %trunc4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 8
+  store double %trunc5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %trunc6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 8
+  store double %trunc7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @ceil_4f32() #0 {
 ; SSE2-LABEL: @ceil_4f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    [[CEIL0:%.*]] = call float @llvm.ceil.f32(float [[LD0]])
 ; SSE2-NEXT:    [[CEIL1:%.*]] = call float @llvm.ceil.f32(float [[LD1]])
 ; SSE2-NEXT:    [[CEIL2:%.*]] = call float @llvm.ceil.f32(float [[LD2]])
 ; SSE2-NEXT:    [[CEIL3:%.*]] = call float @llvm.ceil.f32(float [[LD3]])
-; SSE2-NEXT:    store float [[CEIL0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[CEIL1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[CEIL2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[CEIL3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[CEIL0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[CEIL1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[CEIL2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[CEIL3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @ceil_4f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @ceil_4f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
-; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %ceil0 = call float @llvm.ceil.f32(float %ld0)
   %ceil1 = call float @llvm.ceil.f32(float %ld1)
   %ceil2 = call float @llvm.ceil.f32(float %ld2)
   %ceil3 = call float @llvm.ceil.f32(float %ld3)
-  store float %ceil0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %ceil1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %ceil2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %ceil3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %ceil0, ptr @dst32, align 4
+  store float %ceil1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %ceil2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %ceil3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @ceil_8f32() #0 {
 ; SSE2-LABEL: @ceil_8f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    [[CEIL0:%.*]] = call float @llvm.ceil.f32(float [[LD0]])
 ; SSE2-NEXT:    [[CEIL1:%.*]] = call float @llvm.ceil.f32(float [[LD1]])
 ; SSE2-NEXT:    [[CEIL2:%.*]] = call float @llvm.ceil.f32(float [[LD2]])
@@ -941,39 +941,39 @@ define void @ceil_8f32() #0 {
 ; SSE2-NEXT:    [[CEIL5:%.*]] = call float @llvm.ceil.f32(float [[LD5]])
 ; SSE2-NEXT:    [[CEIL6:%.*]] = call float @llvm.ceil.f32(float [[LD6]])
 ; SSE2-NEXT:    [[CEIL7:%.*]] = call float @llvm.ceil.f32(float [[LD7]])
-; SSE2-NEXT:    store float [[CEIL0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[CEIL1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[CEIL2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[CEIL3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[CEIL4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[CEIL5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[CEIL6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[CEIL7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[CEIL0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[CEIL1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[CEIL2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[CEIL3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[CEIL4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[CEIL5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[CEIL6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[CEIL7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @ceil_8f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @ceil_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %ld5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %ld7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %ceil0 = call float @llvm.ceil.f32(float %ld0)
   %ceil1 = call float @llvm.ceil.f32(float %ld1)
   %ceil2 = call float @llvm.ceil.f32(float %ld2)
@@ -982,35 +982,35 @@ define void @ceil_8f32() #0 {
   %ceil5 = call float @llvm.ceil.f32(float %ld5)
   %ceil6 = call float @llvm.ceil.f32(float %ld6)
   %ceil7 = call float @llvm.ceil.f32(float %ld7)
-  store float %ceil0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %ceil1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %ceil2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %ceil3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %ceil4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %ceil5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %ceil6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %ceil7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %ceil0, ptr @dst32, align 4
+  store float %ceil1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %ceil2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %ceil3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %ceil4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %ceil5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %ceil6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %ceil7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @ceil_16f32() #0 {
 ; SSE2-LABEL: @ceil_16f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
-; SSE2-NEXT:    [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
-; SSE2-NEXT:    [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
-; SSE2-NEXT:    [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-; SSE2-NEXT:    [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-; SSE2-NEXT:    [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-; SSE2-NEXT:    [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-; SSE2-NEXT:    [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-; SSE2-NEXT:    [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
+; SSE2-NEXT:    [[LD9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9), align 4
+; SSE2-NEXT:    [[LD10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+; SSE2-NEXT:    [[LD11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+; SSE2-NEXT:    [[LD12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+; SSE2-NEXT:    [[LD13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+; SSE2-NEXT:    [[LD14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+; SSE2-NEXT:    [[LD15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    [[CEIL0:%.*]] = call float @llvm.ceil.f32(float [[LD0]])
 ; SSE2-NEXT:    [[CEIL1:%.*]] = call float @llvm.ceil.f32(float [[LD1]])
 ; SSE2-NEXT:    [[CEIL2:%.*]] = call float @llvm.ceil.f32(float [[LD2]])
@@ -1027,79 +1027,79 @@ define void @ceil_16f32() #0 {
 ; SSE2-NEXT:    [[CEIL13:%.*]] = call float @llvm.ceil.f32(float [[LD13]])
 ; SSE2-NEXT:    [[CEIL14:%.*]] = call float @llvm.ceil.f32(float [[LD14]])
 ; SSE2-NEXT:    [[CEIL15:%.*]] = call float @llvm.ceil.f32(float [[LD15]])
-; SSE2-NEXT:    store float [[CEIL0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[CEIL1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[CEIL2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[CEIL3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[CEIL4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[CEIL5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[CEIL6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[CEIL7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; SSE2-NEXT:    store float [[CEIL8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
-; SSE2-NEXT:    store float [[CEIL9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
-; SSE2-NEXT:    store float [[CEIL10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-; SSE2-NEXT:    store float [[CEIL11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-; SSE2-NEXT:    store float [[CEIL12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-; SSE2-NEXT:    store float [[CEIL13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-; SSE2-NEXT:    store float [[CEIL14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-; SSE2-NEXT:    store float [[CEIL15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT:    store float [[CEIL0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[CEIL1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[CEIL2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[CEIL3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[CEIL4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[CEIL5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[CEIL6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[CEIL7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[CEIL8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT:    store float [[CEIL9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT:    store float [[CEIL10]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT:    store float [[CEIL11]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT:    store float [[CEIL12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT:    store float [[CEIL13]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT:    store float [[CEIL14]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT:    store float [[CEIL15]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @ceil_16f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP5]])
-; SSE41-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP7]])
-; SSE41-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ceil_16f32(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP1]])
-; AVX1-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP3]])
-; AVX1-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ceil_16f32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP1]])
-; AVX2-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP3]])
-; AVX2-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ceil_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.ceil.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
-  %ld1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
-  %ld3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
-  %ld5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
-  %ld7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
-  %ld9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %ld0  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 0 ), align 4
+  %ld1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2 ), align 4
+  %ld3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4 ), align 4
+  %ld5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6 ), align 4
+  %ld7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8 ), align 4
+  %ld9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %ld11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %ld13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %ld15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %ceil0  = call float @llvm.ceil.f32(float %ld0 )
   %ceil1  = call float @llvm.ceil.f32(float %ld1 )
   %ceil2  = call float @llvm.ceil.f32(float %ld2 )
@@ -1116,78 +1116,78 @@ define void @ceil_16f32() #0 {
   %ceil13 = call float @llvm.ceil.f32(float %ld13)
   %ceil14 = call float @llvm.ceil.f32(float %ld14)
   %ceil15 = call float @llvm.ceil.f32(float %ld15)
-  store float %ceil0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
-  store float %ceil1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %ceil2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
-  store float %ceil3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %ceil4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
-  store float %ceil5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %ceil6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
-  store float %ceil7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %ceil8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
-  store float %ceil9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %ceil10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %ceil11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %ceil12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %ceil13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %ceil14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %ceil15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %ceil0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 4
+  store float %ceil1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %ceil2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 4
+  store float %ceil3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %ceil4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 4
+  store float %ceil5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %ceil6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 4
+  store float %ceil7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %ceil8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 4
+  store float %ceil9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %ceil10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %ceil11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %ceil12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %ceil13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %ceil14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %ceil15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @floor_4f32() #0 {
 ; SSE2-LABEL: @floor_4f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    [[FLOOR0:%.*]] = call float @llvm.floor.f32(float [[LD0]])
 ; SSE2-NEXT:    [[FLOOR1:%.*]] = call float @llvm.floor.f32(float [[LD1]])
 ; SSE2-NEXT:    [[FLOOR2:%.*]] = call float @llvm.floor.f32(float [[LD2]])
 ; SSE2-NEXT:    [[FLOOR3:%.*]] = call float @llvm.floor.f32(float [[LD3]])
-; SSE2-NEXT:    store float [[FLOOR0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[FLOOR1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[FLOOR2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[FLOOR3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[FLOOR0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[FLOOR1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[FLOOR2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[FLOOR3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @floor_4f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @floor_4f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
-; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %floor0 = call float @llvm.floor.f32(float %ld0)
   %floor1 = call float @llvm.floor.f32(float %ld1)
   %floor2 = call float @llvm.floor.f32(float %ld2)
   %floor3 = call float @llvm.floor.f32(float %ld3)
-  store float %floor0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %floor1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %floor2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %floor3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %floor0, ptr @dst32, align 4
+  store float %floor1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %floor2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %floor3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @floor_8f32() #0 {
 ; SSE2-LABEL: @floor_8f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    [[FLOOR0:%.*]] = call float @llvm.floor.f32(float [[LD0]])
 ; SSE2-NEXT:    [[FLOOR1:%.*]] = call float @llvm.floor.f32(float [[LD1]])
 ; SSE2-NEXT:    [[FLOOR2:%.*]] = call float @llvm.floor.f32(float [[LD2]])
@@ -1196,39 +1196,39 @@ define void @floor_8f32() #0 {
 ; SSE2-NEXT:    [[FLOOR5:%.*]] = call float @llvm.floor.f32(float [[LD5]])
 ; SSE2-NEXT:    [[FLOOR6:%.*]] = call float @llvm.floor.f32(float [[LD6]])
 ; SSE2-NEXT:    [[FLOOR7:%.*]] = call float @llvm.floor.f32(float [[LD7]])
-; SSE2-NEXT:    store float [[FLOOR0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[FLOOR1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[FLOOR2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[FLOOR3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[FLOOR4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[FLOOR5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[FLOOR6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[FLOOR7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[FLOOR0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[FLOOR1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[FLOOR2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[FLOOR3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[FLOOR4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[FLOOR5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[FLOOR6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[FLOOR7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @floor_8f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @floor_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %ld5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %ld7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %floor0 = call float @llvm.floor.f32(float %ld0)
   %floor1 = call float @llvm.floor.f32(float %ld1)
   %floor2 = call float @llvm.floor.f32(float %ld2)
@@ -1237,35 +1237,35 @@ define void @floor_8f32() #0 {
   %floor5 = call float @llvm.floor.f32(float %ld5)
   %floor6 = call float @llvm.floor.f32(float %ld6)
   %floor7 = call float @llvm.floor.f32(float %ld7)
-  store float %floor0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %floor1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %floor2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %floor3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %floor4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %floor5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %floor6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %floor7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %floor0, ptr @dst32, align 4
+  store float %floor1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %floor2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %floor3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %floor4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %floor5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %floor6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %floor7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @floor_16f32() #0 {
 ; SSE2-LABEL: @floor_16f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
-; SSE2-NEXT:    [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
-; SSE2-NEXT:    [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
-; SSE2-NEXT:    [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-; SSE2-NEXT:    [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-; SSE2-NEXT:    [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-; SSE2-NEXT:    [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-; SSE2-NEXT:    [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-; SSE2-NEXT:    [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
+; SSE2-NEXT:    [[LD9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9), align 4
+; SSE2-NEXT:    [[LD10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+; SSE2-NEXT:    [[LD11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+; SSE2-NEXT:    [[LD12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+; SSE2-NEXT:    [[LD13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+; SSE2-NEXT:    [[LD14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+; SSE2-NEXT:    [[LD15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    [[FLOOR0:%.*]] = call float @llvm.floor.f32(float [[LD0]])
 ; SSE2-NEXT:    [[FLOOR1:%.*]] = call float @llvm.floor.f32(float [[LD1]])
 ; SSE2-NEXT:    [[FLOOR2:%.*]] = call float @llvm.floor.f32(float [[LD2]])
@@ -1282,79 +1282,79 @@ define void @floor_16f32() #0 {
 ; SSE2-NEXT:    [[FLOOR13:%.*]] = call float @llvm.floor.f32(float [[LD13]])
 ; SSE2-NEXT:    [[FLOOR14:%.*]] = call float @llvm.floor.f32(float [[LD14]])
 ; SSE2-NEXT:    [[FLOOR15:%.*]] = call float @llvm.floor.f32(float [[LD15]])
-; SSE2-NEXT:    store float [[FLOOR0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[FLOOR1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[FLOOR2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[FLOOR3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[FLOOR4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[FLOOR5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[FLOOR6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[FLOOR7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; SSE2-NEXT:    store float [[FLOOR8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
-; SSE2-NEXT:    store float [[FLOOR9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
-; SSE2-NEXT:    store float [[FLOOR10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-; SSE2-NEXT:    store float [[FLOOR11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-; SSE2-NEXT:    store float [[FLOOR12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-; SSE2-NEXT:    store float [[FLOOR13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-; SSE2-NEXT:    store float [[FLOOR14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-; SSE2-NEXT:    store float [[FLOOR15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT:    store float [[FLOOR0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[FLOOR1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[FLOOR2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[FLOOR3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[FLOOR4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[FLOOR5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[FLOOR6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[FLOOR7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[FLOOR8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT:    store float [[FLOOR9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT:    store float [[FLOOR10]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT:    store float [[FLOOR11]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT:    store float [[FLOOR12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT:    store float [[FLOOR13]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT:    store float [[FLOOR14]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT:    store float [[FLOOR15]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @floor_16f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP5]])
-; SSE41-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP7]])
-; SSE41-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @floor_16f32(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP1]])
-; AVX1-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP3]])
-; AVX1-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @floor_16f32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP1]])
-; AVX2-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP3]])
-; AVX2-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @floor_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.floor.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
-  %ld1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
-  %ld3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
-  %ld5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
-  %ld7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
-  %ld9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %ld0  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 0 ), align 4
+  %ld1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2 ), align 4
+  %ld3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4 ), align 4
+  %ld5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6 ), align 4
+  %ld7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8 ), align 4
+  %ld9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %ld11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %ld13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %ld15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %floor0  = call float @llvm.floor.f32(float %ld0 )
   %floor1  = call float @llvm.floor.f32(float %ld1 )
   %floor2  = call float @llvm.floor.f32(float %ld2 )
@@ -1371,78 +1371,78 @@ define void @floor_16f32() #0 {
   %floor13 = call float @llvm.floor.f32(float %ld13)
   %floor14 = call float @llvm.floor.f32(float %ld14)
   %floor15 = call float @llvm.floor.f32(float %ld15)
-  store float %floor0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
-  store float %floor1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %floor2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
-  store float %floor3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %floor4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
-  store float %floor5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %floor6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
-  store float %floor7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %floor8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
-  store float %floor9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %floor10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %floor11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %floor12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %floor13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %floor14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %floor15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %floor0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 4
+  store float %floor1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %floor2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 4
+  store float %floor3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %floor4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 4
+  store float %floor5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %floor6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 4
+  store float %floor7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %floor8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 4
+  store float %floor9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %floor10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %floor11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %floor12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %floor13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %floor14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %floor15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @nearbyint_4f32() #0 {
 ; SSE2-LABEL: @nearbyint_4f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    [[NEARBYINT0:%.*]] = call float @llvm.nearbyint.f32(float [[LD0]])
 ; SSE2-NEXT:    [[NEARBYINT1:%.*]] = call float @llvm.nearbyint.f32(float [[LD1]])
 ; SSE2-NEXT:    [[NEARBYINT2:%.*]] = call float @llvm.nearbyint.f32(float [[LD2]])
 ; SSE2-NEXT:    [[NEARBYINT3:%.*]] = call float @llvm.nearbyint.f32(float [[LD3]])
-; SSE2-NEXT:    store float [[NEARBYINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[NEARBYINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[NEARBYINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[NEARBYINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[NEARBYINT0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[NEARBYINT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[NEARBYINT2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[NEARBYINT3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @nearbyint_4f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @nearbyint_4f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
-; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %nearbyint0 = call float @llvm.nearbyint.f32(float %ld0)
   %nearbyint1 = call float @llvm.nearbyint.f32(float %ld1)
   %nearbyint2 = call float @llvm.nearbyint.f32(float %ld2)
   %nearbyint3 = call float @llvm.nearbyint.f32(float %ld3)
-  store float %nearbyint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %nearbyint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %nearbyint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %nearbyint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %nearbyint0, ptr @dst32, align 4
+  store float %nearbyint1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %nearbyint2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %nearbyint3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @nearbyint_8f32() #0 {
 ; SSE2-LABEL: @nearbyint_8f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    [[NEARBYINT0:%.*]] = call float @llvm.nearbyint.f32(float [[LD0]])
 ; SSE2-NEXT:    [[NEARBYINT1:%.*]] = call float @llvm.nearbyint.f32(float [[LD1]])
 ; SSE2-NEXT:    [[NEARBYINT2:%.*]] = call float @llvm.nearbyint.f32(float [[LD2]])
@@ -1451,39 +1451,39 @@ define void @nearbyint_8f32() #0 {
 ; SSE2-NEXT:    [[NEARBYINT5:%.*]] = call float @llvm.nearbyint.f32(float [[LD5]])
 ; SSE2-NEXT:    [[NEARBYINT6:%.*]] = call float @llvm.nearbyint.f32(float [[LD6]])
 ; SSE2-NEXT:    [[NEARBYINT7:%.*]] = call float @llvm.nearbyint.f32(float [[LD7]])
-; SSE2-NEXT:    store float [[NEARBYINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[NEARBYINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[NEARBYINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[NEARBYINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[NEARBYINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[NEARBYINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[NEARBYINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[NEARBYINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[NEARBYINT0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[NEARBYINT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[NEARBYINT2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[NEARBYINT3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[NEARBYINT4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[NEARBYINT5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[NEARBYINT6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[NEARBYINT7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @nearbyint_8f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @nearbyint_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %ld5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %ld7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %nearbyint0 = call float @llvm.nearbyint.f32(float %ld0)
   %nearbyint1 = call float @llvm.nearbyint.f32(float %ld1)
   %nearbyint2 = call float @llvm.nearbyint.f32(float %ld2)
@@ -1492,35 +1492,35 @@ define void @nearbyint_8f32() #0 {
   %nearbyint5 = call float @llvm.nearbyint.f32(float %ld5)
   %nearbyint6 = call float @llvm.nearbyint.f32(float %ld6)
   %nearbyint7 = call float @llvm.nearbyint.f32(float %ld7)
-  store float %nearbyint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %nearbyint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %nearbyint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %nearbyint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %nearbyint4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %nearbyint5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %nearbyint6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %nearbyint7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %nearbyint0, ptr @dst32, align 4
+  store float %nearbyint1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %nearbyint2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %nearbyint3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %nearbyint4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %nearbyint5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %nearbyint6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %nearbyint7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @nearbyint_16f32() #0 {
 ; SSE2-LABEL: @nearbyint_16f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
-; SSE2-NEXT:    [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
-; SSE2-NEXT:    [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
-; SSE2-NEXT:    [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-; SSE2-NEXT:    [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-; SSE2-NEXT:    [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-; SSE2-NEXT:    [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-; SSE2-NEXT:    [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-; SSE2-NEXT:    [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
+; SSE2-NEXT:    [[LD9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9), align 4
+; SSE2-NEXT:    [[LD10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+; SSE2-NEXT:    [[LD11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+; SSE2-NEXT:    [[LD12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+; SSE2-NEXT:    [[LD13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+; SSE2-NEXT:    [[LD14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+; SSE2-NEXT:    [[LD15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    [[NEARBYINT0:%.*]] = call float @llvm.nearbyint.f32(float [[LD0]])
 ; SSE2-NEXT:    [[NEARBYINT1:%.*]] = call float @llvm.nearbyint.f32(float [[LD1]])
 ; SSE2-NEXT:    [[NEARBYINT2:%.*]] = call float @llvm.nearbyint.f32(float [[LD2]])
@@ -1537,79 +1537,79 @@ define void @nearbyint_16f32() #0 {
 ; SSE2-NEXT:    [[NEARBYINT13:%.*]] = call float @llvm.nearbyint.f32(float [[LD13]])
 ; SSE2-NEXT:    [[NEARBYINT14:%.*]] = call float @llvm.nearbyint.f32(float [[LD14]])
 ; SSE2-NEXT:    [[NEARBYINT15:%.*]] = call float @llvm.nearbyint.f32(float [[LD15]])
-; SSE2-NEXT:    store float [[NEARBYINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[NEARBYINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[NEARBYINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[NEARBYINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[NEARBYINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[NEARBYINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[NEARBYINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[NEARBYINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; SSE2-NEXT:    store float [[NEARBYINT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
-; SSE2-NEXT:    store float [[NEARBYINT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
-; SSE2-NEXT:    store float [[NEARBYINT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-; SSE2-NEXT:    store float [[NEARBYINT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-; SSE2-NEXT:    store float [[NEARBYINT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-; SSE2-NEXT:    store float [[NEARBYINT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-; SSE2-NEXT:    store float [[NEARBYINT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-; SSE2-NEXT:    store float [[NEARBYINT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT:    store float [[NEARBYINT0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[NEARBYINT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[NEARBYINT2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[NEARBYINT3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[NEARBYINT4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[NEARBYINT5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[NEARBYINT6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[NEARBYINT7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[NEARBYINT8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT:    store float [[NEARBYINT9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT:    store float [[NEARBYINT10]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT:    store float [[NEARBYINT11]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT:    store float [[NEARBYINT12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT:    store float [[NEARBYINT13]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT:    store float [[NEARBYINT14]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT:    store float [[NEARBYINT15]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @nearbyint_16f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP5]])
-; SSE41-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP7]])
-; SSE41-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @nearbyint_16f32(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP1]])
-; AVX1-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP3]])
-; AVX1-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @nearbyint_16f32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP1]])
-; AVX2-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP3]])
-; AVX2-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @nearbyint_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
-  %ld1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
-  %ld3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
-  %ld5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
-  %ld7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
-  %ld9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %ld0  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 0 ), align 4
+  %ld1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2 ), align 4
+  %ld3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4 ), align 4
+  %ld5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6 ), align 4
+  %ld7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8 ), align 4
+  %ld9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %ld11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %ld13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %ld15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %nearbyint0  = call float @llvm.nearbyint.f32(float %ld0 )
   %nearbyint1  = call float @llvm.nearbyint.f32(float %ld1 )
   %nearbyint2  = call float @llvm.nearbyint.f32(float %ld2 )
@@ -1626,78 +1626,78 @@ define void @nearbyint_16f32() #0 {
   %nearbyint13 = call float @llvm.nearbyint.f32(float %ld13)
   %nearbyint14 = call float @llvm.nearbyint.f32(float %ld14)
   %nearbyint15 = call float @llvm.nearbyint.f32(float %ld15)
-  store float %nearbyint0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
-  store float %nearbyint1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %nearbyint2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
-  store float %nearbyint3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %nearbyint4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
-  store float %nearbyint5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %nearbyint6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
-  store float %nearbyint7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %nearbyint8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
-  store float %nearbyint9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %nearbyint10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %nearbyint11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %nearbyint12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %nearbyint13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %nearbyint14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %nearbyint15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %nearbyint0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 4
+  store float %nearbyint1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %nearbyint2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 4
+  store float %nearbyint3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %nearbyint4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 4
+  store float %nearbyint5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %nearbyint6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 4
+  store float %nearbyint7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %nearbyint8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 4
+  store float %nearbyint9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %nearbyint10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %nearbyint11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %nearbyint12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %nearbyint13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %nearbyint14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %nearbyint15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @rint_4f32() #0 {
 ; SSE2-LABEL: @rint_4f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    [[RINT0:%.*]] = call float @llvm.rint.f32(float [[LD0]])
 ; SSE2-NEXT:    [[RINT1:%.*]] = call float @llvm.rint.f32(float [[LD1]])
 ; SSE2-NEXT:    [[RINT2:%.*]] = call float @llvm.rint.f32(float [[LD2]])
 ; SSE2-NEXT:    [[RINT3:%.*]] = call float @llvm.rint.f32(float [[LD3]])
-; SSE2-NEXT:    store float [[RINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[RINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[RINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[RINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[RINT0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[RINT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[RINT2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[RINT3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @rint_4f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @rint_4f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
-; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %rint0 = call float @llvm.rint.f32(float %ld0)
   %rint1 = call float @llvm.rint.f32(float %ld1)
   %rint2 = call float @llvm.rint.f32(float %ld2)
   %rint3 = call float @llvm.rint.f32(float %ld3)
-  store float %rint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %rint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %rint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %rint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %rint0, ptr @dst32, align 4
+  store float %rint1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %rint2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %rint3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @rint_8f32() #0 {
 ; SSE2-LABEL: @rint_8f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    [[RINT0:%.*]] = call float @llvm.rint.f32(float [[LD0]])
 ; SSE2-NEXT:    [[RINT1:%.*]] = call float @llvm.rint.f32(float [[LD1]])
 ; SSE2-NEXT:    [[RINT2:%.*]] = call float @llvm.rint.f32(float [[LD2]])
@@ -1706,39 +1706,39 @@ define void @rint_8f32() #0 {
 ; SSE2-NEXT:    [[RINT5:%.*]] = call float @llvm.rint.f32(float [[LD5]])
 ; SSE2-NEXT:    [[RINT6:%.*]] = call float @llvm.rint.f32(float [[LD6]])
 ; SSE2-NEXT:    [[RINT7:%.*]] = call float @llvm.rint.f32(float [[LD7]])
-; SSE2-NEXT:    store float [[RINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[RINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[RINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[RINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[RINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[RINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[RINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[RINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[RINT0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[RINT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[RINT2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[RINT3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[RINT4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[RINT5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[RINT6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[RINT7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @rint_8f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @rint_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %ld5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %ld7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %rint0 = call float @llvm.rint.f32(float %ld0)
   %rint1 = call float @llvm.rint.f32(float %ld1)
   %rint2 = call float @llvm.rint.f32(float %ld2)
@@ -1747,35 +1747,35 @@ define void @rint_8f32() #0 {
   %rint5 = call float @llvm.rint.f32(float %ld5)
   %rint6 = call float @llvm.rint.f32(float %ld6)
   %rint7 = call float @llvm.rint.f32(float %ld7)
-  store float %rint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %rint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %rint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %rint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %rint4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %rint5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %rint6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %rint7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %rint0, ptr @dst32, align 4
+  store float %rint1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %rint2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %rint3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %rint4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %rint5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %rint6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %rint7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @rint_16f32() #0 {
 ; SSE2-LABEL: @rint_16f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
-; SSE2-NEXT:    [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
-; SSE2-NEXT:    [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
-; SSE2-NEXT:    [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-; SSE2-NEXT:    [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-; SSE2-NEXT:    [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-; SSE2-NEXT:    [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-; SSE2-NEXT:    [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-; SSE2-NEXT:    [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
+; SSE2-NEXT:    [[LD9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9), align 4
+; SSE2-NEXT:    [[LD10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+; SSE2-NEXT:    [[LD11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+; SSE2-NEXT:    [[LD12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+; SSE2-NEXT:    [[LD13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+; SSE2-NEXT:    [[LD14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+; SSE2-NEXT:    [[LD15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    [[RINT0:%.*]] = call float @llvm.rint.f32(float [[LD0]])
 ; SSE2-NEXT:    [[RINT1:%.*]] = call float @llvm.rint.f32(float [[LD1]])
 ; SSE2-NEXT:    [[RINT2:%.*]] = call float @llvm.rint.f32(float [[LD2]])
@@ -1792,79 +1792,79 @@ define void @rint_16f32() #0 {
 ; SSE2-NEXT:    [[RINT13:%.*]] = call float @llvm.rint.f32(float [[LD13]])
 ; SSE2-NEXT:    [[RINT14:%.*]] = call float @llvm.rint.f32(float [[LD14]])
 ; SSE2-NEXT:    [[RINT15:%.*]] = call float @llvm.rint.f32(float [[LD15]])
-; SSE2-NEXT:    store float [[RINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[RINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[RINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[RINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[RINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[RINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[RINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[RINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; SSE2-NEXT:    store float [[RINT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
-; SSE2-NEXT:    store float [[RINT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
-; SSE2-NEXT:    store float [[RINT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-; SSE2-NEXT:    store float [[RINT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-; SSE2-NEXT:    store float [[RINT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-; SSE2-NEXT:    store float [[RINT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-; SSE2-NEXT:    store float [[RINT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-; SSE2-NEXT:    store float [[RINT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT:    store float [[RINT0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[RINT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[RINT2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[RINT3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[RINT4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[RINT5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[RINT6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[RINT7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[RINT8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT:    store float [[RINT9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT:    store float [[RINT10]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT:    store float [[RINT11]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT:    store float [[RINT12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT:    store float [[RINT13]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT:    store float [[RINT14]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT:    store float [[RINT15]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @rint_16f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP5]])
-; SSE41-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP7]])
-; SSE41-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @rint_16f32(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
-; AVX1-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP3]])
-; AVX1-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @rint_16f32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
-; AVX2-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP3]])
-; AVX2-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @rint_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.rint.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
-  %ld1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
-  %ld3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
-  %ld5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
-  %ld7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
-  %ld9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %ld0  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 0 ), align 4
+  %ld1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2 ), align 4
+  %ld3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4 ), align 4
+  %ld5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6 ), align 4
+  %ld7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8 ), align 4
+  %ld9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %ld11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %ld13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %ld15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %rint0  = call float @llvm.rint.f32(float %ld0 )
   %rint1  = call float @llvm.rint.f32(float %ld1 )
   %rint2  = call float @llvm.rint.f32(float %ld2 )
@@ -1881,78 +1881,78 @@ define void @rint_16f32() #0 {
   %rint13 = call float @llvm.rint.f32(float %ld13)
   %rint14 = call float @llvm.rint.f32(float %ld14)
   %rint15 = call float @llvm.rint.f32(float %ld15)
-  store float %rint0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
-  store float %rint1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %rint2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
-  store float %rint3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %rint4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
-  store float %rint5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %rint6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
-  store float %rint7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %rint8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
-  store float %rint9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %rint10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %rint11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %rint12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %rint13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %rint14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %rint15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %rint0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 4
+  store float %rint1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %rint2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 4
+  store float %rint3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %rint4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 4
+  store float %rint5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %rint6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 4
+  store float %rint7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %rint8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 4
+  store float %rint9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %rint10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %rint11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %rint12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %rint13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %rint14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %rint15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @trunc_4f32() #0 {
 ; SSE2-LABEL: @trunc_4f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    [[TRUNC0:%.*]] = call float @llvm.trunc.f32(float [[LD0]])
 ; SSE2-NEXT:    [[TRUNC1:%.*]] = call float @llvm.trunc.f32(float [[LD1]])
 ; SSE2-NEXT:    [[TRUNC2:%.*]] = call float @llvm.trunc.f32(float [[LD2]])
 ; SSE2-NEXT:    [[TRUNC3:%.*]] = call float @llvm.trunc.f32(float [[LD3]])
-; SSE2-NEXT:    store float [[TRUNC0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[TRUNC1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[TRUNC2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[TRUNC3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[TRUNC0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[TRUNC1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[TRUNC2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[TRUNC3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @trunc_4f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @trunc_4f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
-; AVX-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %trunc0 = call float @llvm.trunc.f32(float %ld0)
   %trunc1 = call float @llvm.trunc.f32(float %ld1)
   %trunc2 = call float @llvm.trunc.f32(float %ld2)
   %trunc3 = call float @llvm.trunc.f32(float %ld3)
-  store float %trunc0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %trunc1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %trunc2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %trunc3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %trunc0, ptr @dst32, align 4
+  store float %trunc1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %trunc2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %trunc3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @trunc_8f32() #0 {
 ; SSE2-LABEL: @trunc_8f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    [[TRUNC0:%.*]] = call float @llvm.trunc.f32(float [[LD0]])
 ; SSE2-NEXT:    [[TRUNC1:%.*]] = call float @llvm.trunc.f32(float [[LD1]])
 ; SSE2-NEXT:    [[TRUNC2:%.*]] = call float @llvm.trunc.f32(float [[LD2]])
@@ -1961,39 +1961,39 @@ define void @trunc_8f32() #0 {
 ; SSE2-NEXT:    [[TRUNC5:%.*]] = call float @llvm.trunc.f32(float [[LD5]])
 ; SSE2-NEXT:    [[TRUNC6:%.*]] = call float @llvm.trunc.f32(float [[LD6]])
 ; SSE2-NEXT:    [[TRUNC7:%.*]] = call float @llvm.trunc.f32(float [[LD7]])
-; SSE2-NEXT:    store float [[TRUNC0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[TRUNC1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[TRUNC2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[TRUNC3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[TRUNC4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[TRUNC5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[TRUNC6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[TRUNC7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[TRUNC0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[TRUNC1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[TRUNC2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[TRUNC3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[TRUNC4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[TRUNC5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[TRUNC6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[TRUNC7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @trunc_8f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX-LABEL: @trunc_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %ld0 = load float, ptr @src32, align 4
+  %ld1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %ld3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %ld5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %ld7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %trunc0 = call float @llvm.trunc.f32(float %ld0)
   %trunc1 = call float @llvm.trunc.f32(float %ld1)
   %trunc2 = call float @llvm.trunc.f32(float %ld2)
@@ -2002,35 +2002,35 @@ define void @trunc_8f32() #0 {
   %trunc5 = call float @llvm.trunc.f32(float %ld5)
   %trunc6 = call float @llvm.trunc.f32(float %ld6)
   %trunc7 = call float @llvm.trunc.f32(float %ld7)
-  store float %trunc0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %trunc1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %trunc2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %trunc3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %trunc4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %trunc5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %trunc6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %trunc7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %trunc0, ptr @dst32, align 4
+  store float %trunc1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %trunc2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %trunc3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %trunc4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %trunc5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %trunc6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %trunc7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @trunc_16f32() #0 {
 ; SSE2-LABEL: @trunc_16f32(
-; SSE2-NEXT:    [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE2-NEXT:    [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE2-NEXT:    [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE2-NEXT:    [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE2-NEXT:    [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE2-NEXT:    [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE2-NEXT:    [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE2-NEXT:    [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
-; SSE2-NEXT:    [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
-; SSE2-NEXT:    [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
-; SSE2-NEXT:    [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-; SSE2-NEXT:    [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-; SSE2-NEXT:    [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-; SSE2-NEXT:    [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-; SSE2-NEXT:    [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-; SSE2-NEXT:    [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT:    [[LD0:%.*]] = load float, ptr @src32, align 4
+; SSE2-NEXT:    [[LD1:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+; SSE2-NEXT:    [[LD2:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+; SSE2-NEXT:    [[LD3:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+; SSE2-NEXT:    [[LD4:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+; SSE2-NEXT:    [[LD5:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+; SSE2-NEXT:    [[LD6:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+; SSE2-NEXT:    [[LD7:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
+; SSE2-NEXT:    [[LD8:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
+; SSE2-NEXT:    [[LD9:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9), align 4
+; SSE2-NEXT:    [[LD10:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+; SSE2-NEXT:    [[LD11:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+; SSE2-NEXT:    [[LD12:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+; SSE2-NEXT:    [[LD13:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+; SSE2-NEXT:    [[LD14:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+; SSE2-NEXT:    [[LD15:%.*]] = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    [[TRUNC0:%.*]] = call float @llvm.trunc.f32(float [[LD0]])
 ; SSE2-NEXT:    [[TRUNC1:%.*]] = call float @llvm.trunc.f32(float [[LD1]])
 ; SSE2-NEXT:    [[TRUNC2:%.*]] = call float @llvm.trunc.f32(float [[LD2]])
@@ -2047,79 +2047,79 @@ define void @trunc_16f32() #0 {
 ; SSE2-NEXT:    [[TRUNC13:%.*]] = call float @llvm.trunc.f32(float [[LD13]])
 ; SSE2-NEXT:    [[TRUNC14:%.*]] = call float @llvm.trunc.f32(float [[LD14]])
 ; SSE2-NEXT:    [[TRUNC15:%.*]] = call float @llvm.trunc.f32(float [[LD15]])
-; SSE2-NEXT:    store float [[TRUNC0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-; SSE2-NEXT:    store float [[TRUNC1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-; SSE2-NEXT:    store float [[TRUNC2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-; SSE2-NEXT:    store float [[TRUNC3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-; SSE2-NEXT:    store float [[TRUNC4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-; SSE2-NEXT:    store float [[TRUNC5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-; SSE2-NEXT:    store float [[TRUNC6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-; SSE2-NEXT:    store float [[TRUNC7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
-; SSE2-NEXT:    store float [[TRUNC8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
-; SSE2-NEXT:    store float [[TRUNC9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
-; SSE2-NEXT:    store float [[TRUNC10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-; SSE2-NEXT:    store float [[TRUNC11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-; SSE2-NEXT:    store float [[TRUNC12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-; SSE2-NEXT:    store float [[TRUNC13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-; SSE2-NEXT:    store float [[TRUNC14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-; SSE2-NEXT:    store float [[TRUNC15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT:    store float [[TRUNC0]], ptr @dst32, align 4
+; SSE2-NEXT:    store float [[TRUNC1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT:    store float [[TRUNC2]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT:    store float [[TRUNC3]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT:    store float [[TRUNC4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT:    store float [[TRUNC5]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT:    store float [[TRUNC6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT:    store float [[TRUNC7]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT:    store float [[TRUNC8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT:    store float [[TRUNC9]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT:    store float [[TRUNC10]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT:    store float [[TRUNC11]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT:    store float [[TRUNC12]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT:    store float [[TRUNC13]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT:    store float [[TRUNC14]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT:    store float [[TRUNC15]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
 ; SSE2-NEXT:    ret void
 ;
 ; SSE41-LABEL: @trunc_16f32(
-; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE41-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
-; SSE41-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE41-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE41-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP3]])
-; SSE41-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE41-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE41-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP5]])
-; SSE41-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE41-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP7]])
-; SSE41-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE41-NEXT:    ret void
 ;
 ; AVX1-LABEL: @trunc_16f32(
-; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX1-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP1]])
-; AVX1-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX1-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP3]])
-; AVX1-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @trunc_16f32(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX2-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP1]])
-; AVX2-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX2-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP3]])
-; AVX2-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @trunc_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.trunc.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
-  %ld1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
-  %ld3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
-  %ld5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
-  %ld7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
-  %ld9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %ld0  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 0 ), align 4
+  %ld1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2 ), align 4
+  %ld3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4 ), align 4
+  %ld5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6 ), align 4
+  %ld7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8 ), align 4
+  %ld9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %ld11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %ld13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %ld15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %trunc0  = call float @llvm.trunc.f32(float %ld0 )
   %trunc1  = call float @llvm.trunc.f32(float %ld1 )
   %trunc2  = call float @llvm.trunc.f32(float %ld2 )
@@ -2136,22 +2136,22 @@ define void @trunc_16f32() #0 {
   %trunc13 = call float @llvm.trunc.f32(float %ld13)
   %trunc14 = call float @llvm.trunc.f32(float %ld14)
   %trunc15 = call float @llvm.trunc.f32(float %ld15)
-  store float %trunc0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
-  store float %trunc1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %trunc2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
-  store float %trunc3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %trunc4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
-  store float %trunc5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %trunc6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
-  store float %trunc7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %trunc8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
-  store float %trunc9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %trunc10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %trunc11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %trunc12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %trunc13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %trunc14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %trunc15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %trunc0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 4
+  store float %trunc1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %trunc2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 4
+  store float %trunc3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %trunc4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 4
+  store float %trunc5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %trunc6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 4
+  store float %trunc7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %trunc8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 4
+  store float %trunc9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %trunc10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %trunc11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %trunc12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %trunc13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %trunc14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %trunc15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll b/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll
index 83478f5d9d42c..578d463a9489d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll
@@ -3,23 +3,20 @@
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
 
-define void @test1(double* %a, double* %b, double* %c) #0 personality i32 (...)* @__CxxFrameHandler3 {
+define void @test1(ptr %a, ptr %b, ptr %c) #0 personality ptr @__CxxFrameHandler3 {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    invoke void @_CxxThrowException(i8* null, i8* null)
+; CHECK-NEXT:    invoke void @_CxxThrowException(ptr null, ptr null)
 ; CHECK-NEXT:    to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
 ; CHECK:       catch.dispatch:
 ; CHECK-NEXT:    [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
 ; CHECK:       catch:
-; CHECK-NEXT:    [[TMP1:%.*]] = catchpad within [[TMP0]] [i8* null, i32 64, i8* null]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP6]]) [ "funclet"(token [[TMP1]]) ]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP7]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    catchret from [[TMP1]] to label [[TRY_CONT:%.*]]
 ; CHECK:       try.cont:
 ; CHECK-NEXT:    ret void
@@ -27,27 +24,27 @@ define void @test1(double* %a, double* %b, double* %c) #0 personality i32 (...)*
 ; CHECK-NEXT:    unreachable
 ;
 entry:
-  invoke void @_CxxThrowException(i8* null, i8* null)
+  invoke void @_CxxThrowException(ptr null, ptr null)
   to label %unreachable unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %entry
   %0 = catchswitch within none [label %catch] unwind to caller
 
 catch:                                            ; preds = %catch.dispatch
-  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %1 = catchpad within %0 [ptr null, i32 64, ptr null]
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @floor(double %mul) #1 [ "funclet"(token %1) ]
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @floor(double %mul5) #1 [ "funclet"(token %1) ]
-  store double %call, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %call5, double* %arrayidx5, align 8
+  store double %call, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %call5, ptr %arrayidx5, align 8
   catchret from %1 to label %try.cont
 
 try.cont:                                         ; preds = %for.cond.cleanup
@@ -57,7 +54,7 @@ unreachable:                                      ; preds = %entry
   unreachable
 }
 
-declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
 
 declare i32 @__CxxFrameHandler3(...)
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
index b0ebe40bc6039..af78274cd994b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
@@ -12,10 +12,8 @@ define void @test(i16 %0) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 ; CHECK-NEXT:    br label [[FOR_BODY92:%.*]]
 ; CHECK:       for.body92:
-; CHECK-NEXT:    [[SUM_MVR_I:%.*]] = getelementptr i32, i32* undef, i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[SUM_MVR_I]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 8
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr undef, align 8
 ; CHECK-NEXT:    br label [[FOR_BODY92]]
 ;
 for.body92.preheader:
@@ -23,18 +21,17 @@ for.body92.preheader:
 
 for.body92:
   %conv177.i = sext i16 0 to i32
-  %sum_mvr.i = getelementptr i32, i32* undef, i32 0
   %add178.i = add nsw i32 0, %conv177.i
-  store i32 %add178.i, i32* %sum_mvr.i, align 8
+  store i32 %add178.i, ptr undef, align 8
   %1 = zext i16 %0 to i32
-  %sum_mvr_abs.i = getelementptr i32, i32* undef, i32 2
+  %sum_mvr_abs.i = getelementptr i32, ptr undef, i32 2
   %add182.i = add nsw i32 0, %1
-  store i32 %add182.i, i32* %sum_mvr_abs.i, align 8
-  %sum_mvc.i = getelementptr i32, i32* undef, i32 1
+  store i32 %add182.i, ptr %sum_mvr_abs.i, align 8
+  %sum_mvc.i = getelementptr i32, ptr undef, i32 1
   %add184.i = add nsw i32 0, 0
-  store i32 %add184.i, i32* %sum_mvc.i, align 4
-  %sum_mvc_abs.i = getelementptr i32, i32* undef, i32 3
+  store i32 %add184.i, ptr %sum_mvc.i, align 4
+  %sum_mvc_abs.i = getelementptr i32, ptr undef, i32 3
   %add188.i = add nsw i32 0, 0
-  store i32 %add188.i, i32* %sum_mvc_abs.i, align 4
+  store i32 %add188.i, ptr %sum_mvc_abs.i, align 4
   br label %for.body92
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
index f392513b19e59..17f5abada632d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -9,55 +9,53 @@ target triple = "x86_64-unknown-unknown"
 ;   x->first  = y->first  + 16
 ;   x->second = y->second + 16
 
-define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
+define void @foo1 (ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @foo1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <2 x i32*> [[TMP4]], <2 x i64> <i64 16, i64 16>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>*
-; CHECK-NEXT:    store <2 x i32*> [[TMP5]], <2 x i32*>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[Y:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[X:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x ptr>, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> <i64 16, i64 16>
+; CHECK-NEXT:    store <2 x ptr> [[TMP5]], ptr [[TMP2]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
-  %2 = load i32*, i32** %1, align 8
-  %3 = getelementptr inbounds i32, i32* %2, i64 16
-  %4 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 0
-  store i32* %3, i32** %4, align 8
-  %5 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 1
-  %6 = load i32*, i32** %5, align 8
-  %7 = getelementptr inbounds i32, i32* %6, i64 16
-  %8 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 1
-  store i32* %7, i32** %8, align 8
+  %1 = getelementptr inbounds { ptr, ptr }, ptr %y, i64 0, i32 0
+  %2 = load ptr, ptr %1, align 8
+  %3 = getelementptr inbounds i32, ptr %2, i64 16
+  %4 = getelementptr inbounds { ptr, ptr }, ptr %x, i64 0, i32 0
+  store ptr %3, ptr %4, align 8
+  %5 = getelementptr inbounds { ptr, ptr }, ptr %y, i64 0, i32 1
+  %6 = load ptr, ptr %5, align 8
+  %7 = getelementptr inbounds i32, ptr %6, i64 16
+  %8 = getelementptr inbounds { ptr, ptr }, ptr %x, i64 0, i32 1
+  store ptr %7, ptr %8, align 8
   ret void
 }
 
 ; Test that we don't vectorize GEP expressions if indexes are not constants.
 ; We can't produce an efficient code in that case.
-define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
+define void @foo2 (ptr noalias %x, ptr noalias %y, i32 %i) {
 ; CHECK-LABEL: @foo2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[I:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X:%.*]], i64 0, i32 0
-; CHECK-NEXT:    store i32* [[TMP3]], i32** [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[I]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X]], i64 0, i32 1
-; CHECK-NEXT:    store i32* [[TMP7]], i32** [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[Y:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[I:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[X:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store ptr [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[Y]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[I]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[X]], i64 0, i32 1
+; CHECK-NEXT:    store ptr [[TMP7]], ptr [[TMP8]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
-  %2 = load i32*, i32** %1, align 8
-  %3 = getelementptr inbounds i32, i32* %2, i32 %i
-  %4 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 0
-  store i32* %3, i32** %4, align 8
-  %5 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 1
-  %6 = load i32*, i32** %5, align 8
-  %7 = getelementptr inbounds i32, i32* %6, i32 %i
-  %8 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 1
-  store i32* %7, i32** %8, align 8
+  %1 = getelementptr inbounds { ptr, ptr }, ptr %y, i64 0, i32 0
+  %2 = load ptr, ptr %1, align 8
+  %3 = getelementptr inbounds i32, ptr %2, i32 %i
+  %4 = getelementptr inbounds { ptr, ptr }, ptr %x, i64 0, i32 0
+  store ptr %3, ptr %4, align 8
+  %5 = getelementptr inbounds { ptr, ptr }, ptr %y, i64 0, i32 1
+  %6 = load ptr, ptr %5, align 8
+  %7 = getelementptr inbounds i32, ptr %6, i32 %i
+  %8 = getelementptr inbounds { ptr, ptr }, ptr %x, i64 0, i32 1
+  store ptr %7, ptr %8, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
index 1319ad8fe77db..1225ab7c9619e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
@@ -11,11 +11,11 @@ define void @foo() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[LS1_PH:%.*]] = phi float* [ [[_TMP1:%.*]], [[BB1]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LS2_PH:%.*]] = phi float* [ [[_TMP2:%.*]], [[BB1]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    store float undef, float* [[LS1_PH]], align 4
-; CHECK-NEXT:    [[_TMP1]] = getelementptr float, float* [[LS1_PH]], i32 1
-; CHECK-NEXT:    [[_TMP2]] = getelementptr float, float* [[LS2_PH]], i64 4
+; CHECK-NEXT:    [[LS1_PH:%.*]] = phi ptr [ [[_TMP1:%.*]], [[BB1]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LS2_PH:%.*]] = phi ptr [ [[_TMP2:%.*]], [[BB1]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    store float undef, ptr [[LS1_PH]], align 4
+; CHECK-NEXT:    [[_TMP1]] = getelementptr float, ptr [[LS1_PH]], i32 1
+; CHECK-NEXT:    [[_TMP2]] = getelementptr float, ptr [[LS2_PH]], i64 4
 ; CHECK-NEXT:    br i1 false, label [[BB1]], label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    ret void
@@ -24,11 +24,11 @@ entry:
   br label %bb1
 
 bb1:
-  %ls1.ph = phi float* [ %_tmp1, %bb1 ], [ undef, %entry ]
-  %ls2.ph = phi float* [ %_tmp2, %bb1 ], [ undef, %entry ]
-  store float undef, float* %ls1.ph
-  %_tmp1 = getelementptr float, float* %ls1.ph, i32 1
-  %_tmp2 = getelementptr float, float* %ls2.ph, i64 4
+  %ls1.ph = phi ptr [ %_tmp1, %bb1 ], [ undef, %entry ]
+  %ls2.ph = phi ptr [ %_tmp2, %bb1 ], [ undef, %entry ]
+  store float undef, ptr %ls1.ph
+  %_tmp1 = getelementptr float, ptr %ls1.ph, i32 1
+  %_tmp2 = getelementptr float, ptr %ls2.ph, i64 4
   br i1 false, label %bb1, label %bb2
 
 bb2:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
index f5c3b1aabf71a..c83ae53c9f03d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
@@ -7,85 +7,85 @@
 define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-LABEL: @g(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @e, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @e, align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT19:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[C_022:%.*]] = phi i32* [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[C_022]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i32* [[C_022]] to i64
+; CHECK-NEXT:    [[C_022:%.*]] = phi ptr [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[C_022]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, <2 x i32*> [[TMP1]], <2 x i64> <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 1, i64 1>
 ; CHECK-NEXT:    switch i32 [[TMP3]], label [[WHILE_BODY_BACKEDGE]] [
 ; CHECK-NEXT:    i32 2, label [[SW_BB:%.*]]
 ; CHECK-NEXT:    i32 4, label [[SW_BB6:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb:
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32*> [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint i32* [[TMP5]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <2 x i32*> [[TMP1]], <2 x i64> <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32*> [[TMP4]], i32 1
-; CHECK-NEXT:    store i32 [[TMP7]], i32* [[TMP9]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, i32* [[C_022]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 1
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
 ; CHECK:       sw.bb6:
-; CHECK-NEXT:    [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, i32* [[C_022]], i64 2
-; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint i32* [[INCDEC_PTR]] to i64
+; CHECK-NEXT:    [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, <2 x i32*> [[TMP1]], <2 x i64> <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32*> [[TMP4]], i32 0
-; CHECK-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 0
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP13]], align 4
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
 ; CHECK:       while.body.backedge:
-; CHECK-NEXT:    [[C_022_BE]] = phi i32* [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
-; CHECK-NEXT:    [[TMP14]] = phi <2 x i32*> [ [[TMP4]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP8]], [[SW_BB]] ]
+; CHECK-NEXT:    [[C_022_BE]] = phi ptr [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ]
+; CHECK-NEXT:    [[TMP14]] = phi <2 x ptr> [ [[TMP4]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP8]], [[SW_BB]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i32, i32* @e, align 4
+  %0 = load i32, ptr @e, align 4
   %tobool.not19 = icmp eq i32 %0, 0
   br i1 %tobool.not19, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %while.body.backedge
-  %c.022 = phi i32* [ %c.022.be, %while.body.backedge ], [ undef, %entry ]
-  %b.021 = phi i32* [ %b.021.be, %while.body.backedge ], [ undef, %entry ]
-  %a.020 = phi i32* [ %a.020.be, %while.body.backedge ], [ undef, %entry ]
-  %incdec.ptr = getelementptr inbounds i32, i32* %c.022, i64 1
-  %1 = ptrtoint i32* %c.022 to i64
+  %c.022 = phi ptr [ %c.022.be, %while.body.backedge ], [ undef, %entry ]
+  %b.021 = phi ptr [ %b.021.be, %while.body.backedge ], [ undef, %entry ]
+  %a.020 = phi ptr [ %a.020.be, %while.body.backedge ], [ undef, %entry ]
+  %incdec.ptr = getelementptr inbounds i32, ptr %c.022, i64 1
+  %1 = ptrtoint ptr %c.022 to i64
   %2 = trunc i64 %1 to i32
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %a.020, i64 1
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %b.021, i64 1
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %a.020, i64 1
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %b.021, i64 1
   switch i32 %2, label %while.body.backedge [
   i32 2, label %sw.bb
   i32 4, label %sw.bb6
   ]
 
 sw.bb:                                            ; preds = %while.body
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %b.021, i64 2
-  %3 = ptrtoint i32* %incdec.ptr2 to i64
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %b.021, i64 2
+  %3 = ptrtoint ptr %incdec.ptr2 to i64
   %4 = trunc i64 %3 to i32
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %a.020, i64 2
-  store i32 %4, i32* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds i32, i32* %c.022, i64 2
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %a.020, i64 2
+  store i32 %4, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds i32, ptr %c.022, i64 2
   br label %while.body.backedge
 
 sw.bb6:                                           ; preds = %while.body
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %a.020, i64 2
-  %incdec.ptr8 = getelementptr inbounds i32, i32* %c.022, i64 2
-  %5 = ptrtoint i32* %incdec.ptr to i64
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %a.020, i64 2
+  %incdec.ptr8 = getelementptr inbounds i32, ptr %c.022, i64 2
+  %5 = ptrtoint ptr %incdec.ptr to i64
   %6 = trunc i64 %5 to i32
-  %incdec.ptr9 = getelementptr inbounds i32, i32* %b.021, i64 2
-  store i32 %6, i32* %incdec.ptr2, align 4
+  %incdec.ptr9 = getelementptr inbounds i32, ptr %b.021, i64 2
+  store i32 %6, ptr %incdec.ptr2, align 4
   br label %while.body.backedge
 
 while.body.backedge:                              ; preds = %sw.bb6, %while.body, %sw.bb
-  %c.022.be = phi i32* [ %incdec.ptr, %while.body ], [ %incdec.ptr8, %sw.bb6 ], [ %incdec.ptr5, %sw.bb ]
-  %b.021.be = phi i32* [ %incdec.ptr2, %while.body ], [ %incdec.ptr9, %sw.bb6 ], [ %incdec.ptr3, %sw.bb ]
-  %a.020.be = phi i32* [ %incdec.ptr1, %while.body ], [ %incdec.ptr7, %sw.bb6 ], [ %incdec.ptr4, %sw.bb ]
+  %c.022.be = phi ptr [ %incdec.ptr, %while.body ], [ %incdec.ptr8, %sw.bb6 ], [ %incdec.ptr5, %sw.bb ]
+  %b.021.be = phi ptr [ %incdec.ptr2, %while.body ], [ %incdec.ptr9, %sw.bb6 ], [ %incdec.ptr3, %sw.bb ]
+  %a.020.be = phi ptr [ %incdec.ptr1, %while.body ], [ %incdec.ptr7, %sw.bb6 ], [ %incdec.ptr4, %sw.bb ]
   br label %while.body
 
 while.end:                                        ; preds = %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
index fb1e08afd776d..822db3a2f6ea5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
@@ -141,24 +141,23 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
 }
 
 ; PR41892
-define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v2f32_store(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[P:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
   %x1 = extractelement <4 x float> %f, i64 1
   %add01 = fadd float %x0, %x1
-  store float %add01, float* %p, align 4
+  store float %add01, ptr %p, align 4
   %x2 = extractelement <4 x float> %f, i64 2
   %x3 = extractelement <4 x float> %f, i64 3
   %add23 = fadd float %x2, %x3
-  %p23 = getelementptr inbounds float, float* %p, i64 1
-  store float %add23, float * %p23, align 4
+  %p23 = getelementptr inbounds float, ptr %p, i64 1
+  store float %add23, ptr %p23, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
index 8e72c0565ca19..8fd490a9e38eb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
@@ -141,24 +141,23 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
 }
 
 ; PR41892
-define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v2f32_store(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[P:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
   %x1 = extractelement <4 x float> %f, i64 1
   %add01 = fadd float %x0, %x1
-  store float %add01, float* %p, align 4
+  store float %add01, ptr %p, align 4
   %x2 = extractelement <4 x float> %f, i64 2
   %x3 = extractelement <4 x float> %f, i64 3
   %add23 = fadd float %x2, %x3
-  %p23 = getelementptr inbounds float, float* %p, i64 1
-  store float %add23, float * %p23, align 4
+  %p23 = getelementptr inbounds float, ptr %p, i64 1
+  store float %add23, ptr %p23, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
index c17b2d9852e65..8290cfee3c3c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -13,7 +13,7 @@ target triple = "i386-apple-macosx10.9.0"
 ;  }
 ;}
 
-define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
+define i32 @foo(ptr nocapture %A, i32 %n, i32 %k) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
@@ -22,12 +22,10 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_024]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD10]] = add nsw i32 [[I_024]], 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD10]], 10000
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
@@ -39,25 +37,25 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.024 = phi i32 [ 0, %entry ], [ %add10, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.024
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.024
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %n
-  store i32 %add, i32* %arrayidx, align 4
+  store i32 %add, ptr %arrayidx, align 4
   %add121 = or i32 %i.024, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add121
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %add121
+  %1 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %1, %k
-  store i32 %add3, i32* %arrayidx2, align 4
+  store i32 %add3, ptr %arrayidx2, align 4
   %add422 = or i32 %i.024, 2
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add422
-  %2 = load i32, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i32 %add422
+  %2 = load i32, ptr %arrayidx5, align 4
   %add6 = add nsw i32 %2, %n
-  store i32 %add6, i32* %arrayidx5, align 4
+  store i32 %add6, ptr %arrayidx5, align 4
   %add723 = or i32 %i.024, 3
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add723
-  %3 = load i32, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i32 %add723
+  %3 = load i32, ptr %arrayidx8, align 4
   %add9 = add nsw i32 %3, %k
-  store i32 %add9, i32* %arrayidx8, align 4
+  store i32 %add9, ptr %arrayidx8, align 4
   %add10 = add nsw i32 %i.024, 4
   %cmp = icmp slt i32 %add10, 10000
   br i1 %cmp, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index cd113e89ada0e..6ab1f86705489 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -10,52 +10,52 @@
 define float @baz() {
 ; CHECK-LABEL: @baz(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[SHUFFLE]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
-; CHECK-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; CHECK-NEXT:    store float [[OP_RDX1]], ptr @res, align 4
 ; CHECK-NEXT:    ret float [[OP_RDX1]]
 ;
 ; THRESHOLD-LABEL: @baz(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
 ; THRESHOLD-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[SHUFFLE]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
 ; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV]]
-; THRESHOLD-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; THRESHOLD-NEXT:    store float [[OP_RDX1]], ptr @res, align 4
 ; THRESHOLD-NEXT:    ret float [[OP_RDX1]]
 ;
 entry:
-  %0 = load i32, i32* @n, align 4
+  %0 = load i32, ptr @n, align 4
   %mul = mul nsw i32 %0, 3
   %conv = sitofp i32 %mul to float
-  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
-  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
+  %1 = load float, ptr @arr, align 16
+  %2 = load float, ptr @arr1, align 16
   %mul4 = fmul fast float %2, %1
   %add = fadd fast float %mul4, %conv
-  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
   %mul4.1 = fmul fast float %4, %3
   %add.1 = fadd fast float %mul4.1, %add
-  %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+  %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
   %mul4.2 = fmul fast float %6, %5
   %add.2 = fadd fast float %mul4.2, %add.1
-  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-  %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+  %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+  %8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
   %mul4.3 = fmul fast float %8, %7
   %add.3 = fadd fast float %mul4.3, %add.2
   %add7 = fadd fast float %add.3, %conv
@@ -63,182 +63,182 @@ entry:
   %add19.1 = fadd fast float %mul4.1, %add19
   %add19.2 = fadd fast float %mul4.2, %add19.1
   %add19.3 = fadd fast float %mul4.3, %add19.2
-  store float %add19.3, float* @res, align 4
+  store float %add19.3, ptr @res, align 4
   ret float %add19.3
 }
 
 define float @bazz() {
 ; CHECK-LABEL: @bazz(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; CHECK-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
-; CHECK-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; CHECK-NEXT:    store float [[OP_RDX1]], ptr @res, align 4
 ; CHECK-NEXT:    ret float [[OP_RDX1]]
 ;
 ; THRESHOLD-LABEL: @bazz(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; THRESHOLD-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
 ; THRESHOLD-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @arr, align 16
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, ptr @arr1, align 16
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[CONV]]
 ; THRESHOLD-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[CONV6]]
-; THRESHOLD-NEXT:    store float [[OP_RDX1]], float* @res, align 4
+; THRESHOLD-NEXT:    store float [[OP_RDX1]], ptr @res, align 4
 ; THRESHOLD-NEXT:    ret float [[OP_RDX1]]
 ;
 entry:
-  %0 = load i32, i32* @n, align 4
+  %0 = load i32, ptr @n, align 4
   %mul = mul nsw i32 %0, 3
   %conv = sitofp i32 %mul to float
-  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
-  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
+  %1 = load float, ptr @arr, align 16
+  %2 = load float, ptr @arr1, align 16
   %mul4 = fmul fast float %2, %1
   %add = fadd fast float %mul4, %conv
-  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
   %mul4.1 = fmul fast float %4, %3
   %add.1 = fadd fast float %mul4.1, %add
-  %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+  %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
   %mul4.2 = fmul fast float %6, %5
   %add.2 = fadd fast float %mul4.2, %add.1
-  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-  %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+  %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+  %8 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
   %mul4.3 = fmul fast float %8, %7
   %add.3 = fadd fast float %mul4.3, %add.2
   %mul5 = shl nsw i32 %0, 2
   %conv6 = sitofp i32 %mul5 to float
   %add7 = fadd fast float %add.3, %conv6
-  %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
-  %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
+  %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 4), align 16
+  %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 4), align 16
   %mul18 = fmul fast float %10, %9
   %add19 = fadd fast float %mul18, %add7
-  %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
-  %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
+  %11 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 5), align 4
+  %12 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 5), align 4
   %mul18.1 = fmul fast float %12, %11
   %add19.1 = fadd fast float %mul18.1, %add19
-  %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8
-  %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8
+  %13 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 6), align 8
+  %14 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 6), align 8
   %mul18.2 = fmul fast float %14, %13
   %add19.2 = fadd fast float %mul18.2, %add19.1
-  %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4
-  %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4
+  %15 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 7), align 4
+  %16 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 7), align 4
   %mul18.3 = fmul fast float %16, %15
   %add19.3 = fadd fast float %mul18.3, %add19.2
-  store float %add19.3, float* @res, align 4
+  store float %add19.3, ptr @res, align 4
   ret float %add19.3
 }
 
 define float @bazzz() {
 ; CHECK-LABEL: @bazzz(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
-; CHECK-NEXT:    store float [[TMP5]], float* @res, align 4
+; CHECK-NEXT:    store float [[TMP5]], ptr @res, align 4
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;
 ; THRESHOLD-LABEL: @bazzz(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
-; THRESHOLD-NEXT:    store float [[TMP5]], float* @res, align 4
+; THRESHOLD-NEXT:    store float [[TMP5]], ptr @res, align 4
 ; THRESHOLD-NEXT:    ret float [[TMP5]]
 ;
 entry:
-  %0 = load i32, i32* @n, align 4
+  %0 = load i32, ptr @n, align 4
   %conv = sitofp i32 %0 to float
-  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
-  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
+  %1 = load float, ptr @arr, align 16
+  %2 = load float, ptr @arr1, align 16
   %mul = fmul fast float %2, %1
-  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
   %mul.1 = fmul fast float %4, %3
   %5 = fadd fast float %mul.1, %mul
-  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+  %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
   %mul.2 = fmul fast float %7, %6
   %8 = fadd fast float %mul.2, %5
-  %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-  %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+  %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+  %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
   %mul.3 = fmul fast float %10, %9
   %11 = fadd fast float %mul.3, %8
   %12 = fmul fast float %conv, %11
-  store float %12, float* @res, align 4
+  store float %12, ptr @res, align 4
   ret float %12
 }
 
 define i32 @foo() {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
 ; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
-; CHECK-NEXT:    store i32 [[CONV4]], i32* @n, align 4
+; CHECK-NEXT:    store i32 [[CONV4]], ptr @n, align 4
 ; CHECK-NEXT:    ret i32 [[CONV4]]
 ;
 ; THRESHOLD-LABEL: @foo(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, ptr @n, align 4
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @arr, align 16
+; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @arr1, align 16
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
 ; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
-; THRESHOLD-NEXT:    store i32 [[CONV4]], i32* @n, align 4
+; THRESHOLD-NEXT:    store i32 [[CONV4]], ptr @n, align 4
 ; THRESHOLD-NEXT:    ret i32 [[CONV4]]
 ;
 entry:
-  %0 = load i32, i32* @n, align 4
+  %0 = load i32, ptr @n, align 4
   %conv = sitofp i32 %0 to float
-  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
-  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
+  %1 = load float, ptr @arr, align 16
+  %2 = load float, ptr @arr1, align 16
   %mul = fmul fast float %2, %1
-  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
-  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
+  %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
   %mul.1 = fmul fast float %4, %3
   %5 = fadd fast float %mul.1, %mul
-  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+  %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
   %mul.2 = fmul fast float %7, %6
   %8 = fadd fast float %mul.2, %5
-  %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-  %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+  %9 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+  %10 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
   %mul.3 = fmul fast float %10, %9
   %11 = fadd fast float %mul.3, %8
   %12 = fmul fast float %conv, %11
   %conv4 = fptosi float %12 to i32
-  store i32 %conv4, i32* @n, align 4
+  store i32 %conv4, ptr @n, align 4
   ret i32 %conv4
 }
 
@@ -247,79 +247,77 @@ entry:
 define float @bar() {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; CHECK-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
 ; CHECK-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
 ; CHECK-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
-; CHECK-NEXT:    store float [[MAX_0_MUL3_2]], float* @res, align 4
+; CHECK-NEXT:    store float [[MAX_0_MUL3_2]], ptr @res, align 4
 ; CHECK-NEXT:    ret float [[MAX_0_MUL3_2]]
 ;
 ; THRESHOLD-LABEL: @bar(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
+; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr @arr, align 16
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr @arr1, align 16
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; THRESHOLD-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 ; THRESHOLD-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-; THRESHOLD-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+; THRESHOLD-NEXT:    [[TMP5:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+; THRESHOLD-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
 ; THRESHOLD-NEXT:    [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
 ; THRESHOLD-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
 ; THRESHOLD-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
-; THRESHOLD-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-; THRESHOLD-NEXT:    [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+; THRESHOLD-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+; THRESHOLD-NEXT:    [[TMP8:%.*]] = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
 ; THRESHOLD-NEXT:    [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
 ; THRESHOLD-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
 ; THRESHOLD-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
-; THRESHOLD-NEXT:    store float [[MAX_0_MUL3_2]], float* @res, align 4
+; THRESHOLD-NEXT:    store float [[MAX_0_MUL3_2]], ptr @res, align 4
 ; THRESHOLD-NEXT:    ret float [[MAX_0_MUL3_2]]
 ;
 entry:
-  %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
-  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
+  %0 = load float, ptr @arr, align 16
+  %1 = load float, ptr @arr1, align 16
   %mul = fmul fast float %1, %0
-  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
-  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
+  %2 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 1), align 4
+  %3 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 1), align 4
   %mul3 = fmul fast float %3, %2
   %cmp4 = fcmp fast ogt float %mul, %mul3
   %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
-  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
-  %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
+  %4 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 2), align 8
+  %5 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 2), align 8
   %mul3.1 = fmul fast float %5, %4
   %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
   %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
-  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
-  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
+  %6 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr, i64 0, i64 3), align 4
+  %7 = load float, ptr getelementptr inbounds ([20 x float], ptr @arr1, i64 0, i64 3), align 4
   %mul3.2 = fmul fast float %7, %6
   %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
   %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
-  store float %max.0.mul3.2, float* @res, align 4
+  store float %max.0.mul3.2, ptr @res, align 4
   ret float %max.0.mul3.2
 }
 
-define float @f(float* nocapture readonly %x) {
+define float @f(ptr nocapture readonly %x) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_32]] to <16 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
+; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
@@ -327,169 +325,166 @@ define float @f(float* nocapture readonly %x) {
 ;
 ; THRESHOLD-LABEL: @f(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_32]] to <16 x float>*
-; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[TMP2]], align 4
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
+; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
 ; THRESHOLD-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
 ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
 ;
   entry:
-  %0 = load float, float* %x, align 4
-  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
-  %1 = load float, float* %arrayidx.1, align 4
+  %0 = load float, ptr %x, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+  %1 = load float, ptr %arrayidx.1, align 4
   %add.1 = fadd fast float %1, %0
-  %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2
-  %2 = load float, float* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+  %2 = load float, ptr %arrayidx.2, align 4
   %add.2 = fadd fast float %2, %add.1
-  %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3
-  %3 = load float, float* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+  %3 = load float, ptr %arrayidx.3, align 4
   %add.3 = fadd fast float %3, %add.2
-  %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4
-  %4 = load float, float* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4
+  %4 = load float, ptr %arrayidx.4, align 4
   %add.4 = fadd fast float %4, %add.3
-  %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5
-  %5 = load float, float* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5
+  %5 = load float, ptr %arrayidx.5, align 4
   %add.5 = fadd fast float %5, %add.4
-  %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6
-  %6 = load float, float* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6
+  %6 = load float, ptr %arrayidx.6, align 4
   %add.6 = fadd fast float %6, %add.5
-  %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7
-  %7 = load float, float* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7
+  %7 = load float, ptr %arrayidx.7, align 4
   %add.7 = fadd fast float %7, %add.6
-  %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8
-  %8 = load float, float* %arrayidx.8, align 4
+  %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8
+  %8 = load float, ptr %arrayidx.8, align 4
   %add.8 = fadd fast float %8, %add.7
-  %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9
-  %9 = load float, float* %arrayidx.9, align 4
+  %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9
+  %9 = load float, ptr %arrayidx.9, align 4
   %add.9 = fadd fast float %9, %add.8
-  %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10
-  %10 = load float, float* %arrayidx.10, align 4
+  %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10
+  %10 = load float, ptr %arrayidx.10, align 4
   %add.10 = fadd fast float %10, %add.9
-  %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11
-  %11 = load float, float* %arrayidx.11, align 4
+  %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11
+  %11 = load float, ptr %arrayidx.11, align 4
   %add.11 = fadd fast float %11, %add.10
-  %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12
-  %12 = load float, float* %arrayidx.12, align 4
+  %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12
+  %12 = load float, ptr %arrayidx.12, align 4
   %add.12 = fadd fast float %12, %add.11
-  %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13
-  %13 = load float, float* %arrayidx.13, align 4
+  %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13
+  %13 = load float, ptr %arrayidx.13, align 4
   %add.13 = fadd fast float %13, %add.12
-  %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14
-  %14 = load float, float* %arrayidx.14, align 4
+  %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14
+  %14 = load float, ptr %arrayidx.14, align 4
   %add.14 = fadd fast float %14, %add.13
-  %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15
-  %15 = load float, float* %arrayidx.15, align 4
+  %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15
+  %15 = load float, ptr %arrayidx.15, align 4
   %add.15 = fadd fast float %15, %add.14
-  %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16
-  %16 = load float, float* %arrayidx.16, align 4
+  %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16
+  %16 = load float, ptr %arrayidx.16, align 4
   %add.16 = fadd fast float %16, %add.15
-  %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17
-  %17 = load float, float* %arrayidx.17, align 4
+  %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17
+  %17 = load float, ptr %arrayidx.17, align 4
   %add.17 = fadd fast float %17, %add.16
-  %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18
-  %18 = load float, float* %arrayidx.18, align 4
+  %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18
+  %18 = load float, ptr %arrayidx.18, align 4
   %add.18 = fadd fast float %18, %add.17
-  %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19
-  %19 = load float, float* %arrayidx.19, align 4
+  %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19
+  %19 = load float, ptr %arrayidx.19, align 4
   %add.19 = fadd fast float %19, %add.18
-  %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20
-  %20 = load float, float* %arrayidx.20, align 4
+  %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20
+  %20 = load float, ptr %arrayidx.20, align 4
   %add.20 = fadd fast float %20, %add.19
-  %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21
-  %21 = load float, float* %arrayidx.21, align 4
+  %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21
+  %21 = load float, ptr %arrayidx.21, align 4
   %add.21 = fadd fast float %21, %add.20
-  %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22
-  %22 = load float, float* %arrayidx.22, align 4
+  %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22
+  %22 = load float, ptr %arrayidx.22, align 4
   %add.22 = fadd fast float %22, %add.21
-  %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23
-  %23 = load float, float* %arrayidx.23, align 4
+  %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23
+  %23 = load float, ptr %arrayidx.23, align 4
   %add.23 = fadd fast float %23, %add.22
-  %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24
-  %24 = load float, float* %arrayidx.24, align 4
+  %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24
+  %24 = load float, ptr %arrayidx.24, align 4
   %add.24 = fadd fast float %24, %add.23
-  %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25
-  %25 = load float, float* %arrayidx.25, align 4
+  %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25
+  %25 = load float, ptr %arrayidx.25, align 4
   %add.25 = fadd fast float %25, %add.24
-  %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26
-  %26 = load float, float* %arrayidx.26, align 4
+  %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26
+  %26 = load float, ptr %arrayidx.26, align 4
   %add.26 = fadd fast float %26, %add.25
-  %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27
-  %27 = load float, float* %arrayidx.27, align 4
+  %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27
+  %27 = load float, ptr %arrayidx.27, align 4
   %add.27 = fadd fast float %27, %add.26
-  %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28
-  %28 = load float, float* %arrayidx.28, align 4
+  %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28
+  %28 = load float, ptr %arrayidx.28, align 4
   %add.28 = fadd fast float %28, %add.27
-  %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29
-  %29 = load float, float* %arrayidx.29, align 4
+  %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29
+  %29 = load float, ptr %arrayidx.29, align 4
   %add.29 = fadd fast float %29, %add.28
-  %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30
-  %30 = load float, float* %arrayidx.30, align 4
+  %arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30
+  %30 = load float, ptr %arrayidx.30, align 4
   %add.30 = fadd fast float %30, %add.29
-  %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31
-  %31 = load float, float* %arrayidx.31, align 4
+  %arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31
+  %31 = load float, ptr %arrayidx.31, align 4
   %add.31 = fadd fast float %31, %add.30
-  %arrayidx.32 = getelementptr inbounds float, float* %x, i64 32
-  %32 = load float, float* %arrayidx.32, align 4
+  %arrayidx.32 = getelementptr inbounds float, ptr %x, i64 32
+  %32 = load float, ptr %arrayidx.32, align 4
   %add.32 = fadd fast float %32, %add.31
-  %arrayidx.33 = getelementptr inbounds float, float* %x, i64 33
-  %33 = load float, float* %arrayidx.33, align 4
+  %arrayidx.33 = getelementptr inbounds float, ptr %x, i64 33
+  %33 = load float, ptr %arrayidx.33, align 4
   %add.33 = fadd fast float %33, %add.32
-  %arrayidx.34 = getelementptr inbounds float, float* %x, i64 34
-  %34 = load float, float* %arrayidx.34, align 4
+  %arrayidx.34 = getelementptr inbounds float, ptr %x, i64 34
+  %34 = load float, ptr %arrayidx.34, align 4
   %add.34 = fadd fast float %34, %add.33
-  %arrayidx.35 = getelementptr inbounds float, float* %x, i64 35
-  %35 = load float, float* %arrayidx.35, align 4
+  %arrayidx.35 = getelementptr inbounds float, ptr %x, i64 35
+  %35 = load float, ptr %arrayidx.35, align 4
   %add.35 = fadd fast float %35, %add.34
-  %arrayidx.36 = getelementptr inbounds float, float* %x, i64 36
-  %36 = load float, float* %arrayidx.36, align 4
+  %arrayidx.36 = getelementptr inbounds float, ptr %x, i64 36
+  %36 = load float, ptr %arrayidx.36, align 4
   %add.36 = fadd fast float %36, %add.35
-  %arrayidx.37 = getelementptr inbounds float, float* %x, i64 37
-  %37 = load float, float* %arrayidx.37, align 4
+  %arrayidx.37 = getelementptr inbounds float, ptr %x, i64 37
+  %37 = load float, ptr %arrayidx.37, align 4
   %add.37 = fadd fast float %37, %add.36
-  %arrayidx.38 = getelementptr inbounds float, float* %x, i64 38
-  %38 = load float, float* %arrayidx.38, align 4
+  %arrayidx.38 = getelementptr inbounds float, ptr %x, i64 38
+  %38 = load float, ptr %arrayidx.38, align 4
   %add.38 = fadd fast float %38, %add.37
-  %arrayidx.39 = getelementptr inbounds float, float* %x, i64 39
-  %39 = load float, float* %arrayidx.39, align 4
+  %arrayidx.39 = getelementptr inbounds float, ptr %x, i64 39
+  %39 = load float, ptr %arrayidx.39, align 4
   %add.39 = fadd fast float %39, %add.38
-  %arrayidx.40 = getelementptr inbounds float, float* %x, i64 40
-  %40 = load float, float* %arrayidx.40, align 4
+  %arrayidx.40 = getelementptr inbounds float, ptr %x, i64 40
+  %40 = load float, ptr %arrayidx.40, align 4
   %add.40 = fadd fast float %40, %add.39
-  %arrayidx.41 = getelementptr inbounds float, float* %x, i64 41
-  %41 = load float, float* %arrayidx.41, align 4
+  %arrayidx.41 = getelementptr inbounds float, ptr %x, i64 41
+  %41 = load float, ptr %arrayidx.41, align 4
   %add.41 = fadd fast float %41, %add.40
-  %arrayidx.42 = getelementptr inbounds float, float* %x, i64 42
-  %42 = load float, float* %arrayidx.42, align 4
+  %arrayidx.42 = getelementptr inbounds float, ptr %x, i64 42
+  %42 = load float, ptr %arrayidx.42, align 4
   %add.42 = fadd fast float %42, %add.41
-  %arrayidx.43 = getelementptr inbounds float, float* %x, i64 43
-  %43 = load float, float* %arrayidx.43, align 4
+  %arrayidx.43 = getelementptr inbounds float, ptr %x, i64 43
+  %43 = load float, ptr %arrayidx.43, align 4
   %add.43 = fadd fast float %43, %add.42
-  %arrayidx.44 = getelementptr inbounds float, float* %x, i64 44
-  %44 = load float, float* %arrayidx.44, align 4
+  %arrayidx.44 = getelementptr inbounds float, ptr %x, i64 44
+  %44 = load float, ptr %arrayidx.44, align 4
   %add.44 = fadd fast float %44, %add.43
-  %arrayidx.45 = getelementptr inbounds float, float* %x, i64 45
-  %45 = load float, float* %arrayidx.45, align 4
+  %arrayidx.45 = getelementptr inbounds float, ptr %x, i64 45
+  %45 = load float, ptr %arrayidx.45, align 4
   %add.45 = fadd fast float %45, %add.44
-  %arrayidx.46 = getelementptr inbounds float, float* %x, i64 46
-  %46 = load float, float* %arrayidx.46, align 4
+  %arrayidx.46 = getelementptr inbounds float, ptr %x, i64 46
+  %46 = load float, ptr %arrayidx.46, align 4
   %add.46 = fadd fast float %46, %add.45
-  %arrayidx.47 = getelementptr inbounds float, float* %x, i64 47
-  %47 = load float, float* %arrayidx.47, align 4
+  %arrayidx.47 = getelementptr inbounds float, ptr %x, i64 47
+  %47 = load float, ptr %arrayidx.47, align 4
   %add.47 = fadd fast float %47, %add.46
   ret float %add.47
 }
 
-define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
+define float @f1(ptr nocapture readonly %x, i32 %a, i32 %b) {
 ; CHECK-LABEL: @f1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[REM]] to float
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
 ; CHECK-NEXT:    ret float [[OP_RDX]]
@@ -498,8 +493,7 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[REM]] to float
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <32 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float -0.000000e+00, <32 x float> [[TMP1]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
 ; THRESHOLD-NEXT:    ret float [[OP_RDX]]
@@ -507,120 +501,117 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
   entry:
   %rem = srem i32 %a, %b
   %conv = sitofp i32 %rem to float
-  %0 = load float, float* %x, align 4
+  %0 = load float, ptr %x, align 4
   %add = fadd fast float %0, %conv
-  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
-  %1 = load float, float* %arrayidx.1, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+  %1 = load float, ptr %arrayidx.1, align 4
   %add.1 = fadd fast float %1, %add
-  %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2
-  %2 = load float, float* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+  %2 = load float, ptr %arrayidx.2, align 4
   %add.2 = fadd fast float %2, %add.1
-  %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3
-  %3 = load float, float* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+  %3 = load float, ptr %arrayidx.3, align 4
   %add.3 = fadd fast float %3, %add.2
-  %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4
-  %4 = load float, float* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 4
+  %4 = load float, ptr %arrayidx.4, align 4
   %add.4 = fadd fast float %4, %add.3
-  %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5
-  %5 = load float, float* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 5
+  %5 = load float, ptr %arrayidx.5, align 4
   %add.5 = fadd fast float %5, %add.4
-  %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6
-  %6 = load float, float* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 6
+  %6 = load float, ptr %arrayidx.6, align 4
   %add.6 = fadd fast float %6, %add.5
-  %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7
-  %7 = load float, float* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 7
+  %7 = load float, ptr %arrayidx.7, align 4
   %add.7 = fadd fast float %7, %add.6
-  %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8
-  %8 = load float, float* %arrayidx.8, align 4
+  %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 8
+  %8 = load float, ptr %arrayidx.8, align 4
   %add.8 = fadd fast float %8, %add.7
-  %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9
-  %9 = load float, float* %arrayidx.9, align 4
+  %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 9
+  %9 = load float, ptr %arrayidx.9, align 4
   %add.9 = fadd fast float %9, %add.8
-  %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10
-  %10 = load float, float* %arrayidx.10, align 4
+  %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 10
+  %10 = load float, ptr %arrayidx.10, align 4
   %add.10 = fadd fast float %10, %add.9
-  %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11
-  %11 = load float, float* %arrayidx.11, align 4
+  %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 11
+  %11 = load float, ptr %arrayidx.11, align 4
   %add.11 = fadd fast float %11, %add.10
-  %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12
-  %12 = load float, float* %arrayidx.12, align 4
+  %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 12
+  %12 = load float, ptr %arrayidx.12, align 4
   %add.12 = fadd fast float %12, %add.11
-  %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13
-  %13 = load float, float* %arrayidx.13, align 4
+  %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 13
+  %13 = load float, ptr %arrayidx.13, align 4
   %add.13 = fadd fast float %13, %add.12
-  %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14
-  %14 = load float, float* %arrayidx.14, align 4
+  %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 14
+  %14 = load float, ptr %arrayidx.14, align 4
   %add.14 = fadd fast float %14, %add.13
-  %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15
-  %15 = load float, float* %arrayidx.15, align 4
+  %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 15
+  %15 = load float, ptr %arrayidx.15, align 4
   %add.15 = fadd fast float %15, %add.14
-  %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16
-  %16 = load float, float* %arrayidx.16, align 4
+  %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 16
+  %16 = load float, ptr %arrayidx.16, align 4
   %add.16 = fadd fast float %16, %add.15
-  %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17
-  %17 = load float, float* %arrayidx.17, align 4
+  %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 17
+  %17 = load float, ptr %arrayidx.17, align 4
   %add.17 = fadd fast float %17, %add.16
-  %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18
-  %18 = load float, float* %arrayidx.18, align 4
+  %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 18
+  %18 = load float, ptr %arrayidx.18, align 4
   %add.18 = fadd fast float %18, %add.17
-  %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19
-  %19 = load float, float* %arrayidx.19, align 4
+  %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 19
+  %19 = load float, ptr %arrayidx.19, align 4
   %add.19 = fadd fast float %19, %add.18
-  %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20
-  %20 = load float, float* %arrayidx.20, align 4
+  %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 20
+  %20 = load float, ptr %arrayidx.20, align 4
   %add.20 = fadd fast float %20, %add.19
-  %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21
-  %21 = load float, float* %arrayidx.21, align 4
+  %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 21
+  %21 = load float, ptr %arrayidx.21, align 4
   %add.21 = fadd fast float %21, %add.20
-  %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22
-  %22 = load float, float* %arrayidx.22, align 4
+  %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 22
+  %22 = load float, ptr %arrayidx.22, align 4
   %add.22 = fadd fast float %22, %add.21
-  %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23
-  %23 = load float, float* %arrayidx.23, align 4
+  %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 23
+  %23 = load float, ptr %arrayidx.23, align 4
   %add.23 = fadd fast float %23, %add.22
-  %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24
-  %24 = load float, float* %arrayidx.24, align 4
+  %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 24
+  %24 = load float, ptr %arrayidx.24, align 4
   %add.24 = fadd fast float %24, %add.23
-  %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25
-  %25 = load float, float* %arrayidx.25, align 4
+  %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 25
+  %25 = load float, ptr %arrayidx.25, align 4
   %add.25 = fadd fast float %25, %add.24
-  %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26
-  %26 = load float, float* %arrayidx.26, align 4
+  %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 26
+  %26 = load float, ptr %arrayidx.26, align 4
   %add.26 = fadd fast float %26, %add.25
-  %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27
-  %27 = load float, float* %arrayidx.27, align 4
+  %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 27
+  %27 = load float, ptr %arrayidx.27, align 4
   %add.27 = fadd fast float %27, %add.26
-  %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28
-  %28 = load float, float* %arrayidx.28, align 4
+  %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 28
+  %28 = load float, ptr %arrayidx.28, align 4
   %add.28 = fadd fast float %28, %add.27
-  %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29
-  %29 = load float, float* %arrayidx.29, align 4
+  %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 29
+  %29 = load float, ptr %arrayidx.29, align 4
   %add.29 = fadd fast float %29, %add.28
-  %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30
-  %30 = load float, float* %arrayidx.30, align 4
+  %arrayidx.30 = getelementptr inbounds float, ptr %x, i64 30
+  %30 = load float, ptr %arrayidx.30, align 4
   %add.30 = fadd fast float %30, %add.29
-  %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31
-  %31 = load float, float* %arrayidx.31, align 4
+  %arrayidx.31 = getelementptr inbounds float, ptr %x, i64 31
+  %31 = load float, ptr %arrayidx.31, align 4
   %add.31 = fadd fast float %31, %add.30
   ret float %add.31
 }
 
-define float @loadadd31(float* nocapture readonly %x) {
+define float @loadadd31(ptr nocapture readonly %x) {
 ; CHECK-LABEL: @loadadd31(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <16 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <8 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_24]] to <4 x float>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[ARRAYIDX_28]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX_29]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
@@ -632,19 +623,16 @@ define float @loadadd31(float* nocapture readonly %x) {
 ;
 ; THRESHOLD-LABEL: @loadadd31(
 ; THRESHOLD-NEXT:  entry:
-; THRESHOLD-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <16 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
-; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <8 x float>*
-; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* [[TMP2]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
-; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_24]] to <4 x float>*
-; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
-; THRESHOLD-NEXT:    [[TMP6:%.*]] = load float, float* [[ARRAYIDX_28]], align 4
-; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
-; THRESHOLD-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX_29]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
+; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
+; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
+; THRESHOLD-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
+; THRESHOLD-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
 ; THRESHOLD-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP1]])
 ; THRESHOLD-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
@@ -655,105 +643,104 @@ define float @loadadd31(float* nocapture readonly %x) {
 ; THRESHOLD-NEXT:    ret float [[OP_RDX3]]
 ;
   entry:
-  %arrayidx = getelementptr inbounds float, float* %x, i64 1
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2
-  %1 = load float, float* %arrayidx.1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 1
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 2
+  %1 = load float, ptr %arrayidx.1, align 4
   %add.1 = fadd fast float %1, %0
-  %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3
-  %2 = load float, float* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 3
+  %2 = load float, ptr %arrayidx.2, align 4
   %add.2 = fadd fast float %2, %add.1
-  %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4
-  %3 = load float, float* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 4
+  %3 = load float, ptr %arrayidx.3, align 4
   %add.3 = fadd fast float %3, %add.2
-  %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5
-  %4 = load float, float* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds float, ptr %x, i64 5
+  %4 = load float, ptr %arrayidx.4, align 4
   %add.4 = fadd fast float %4, %add.3
-  %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6
-  %5 = load float, float* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds float, ptr %x, i64 6
+  %5 = load float, ptr %arrayidx.5, align 4
   %add.5 = fadd fast float %5, %add.4
-  %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7
-  %6 = load float, float* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds float, ptr %x, i64 7
+  %6 = load float, ptr %arrayidx.6, align 4
   %add.6 = fadd fast float %6, %add.5
-  %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8
-  %7 = load float, float* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds float, ptr %x, i64 8
+  %7 = load float, ptr %arrayidx.7, align 4
   %add.7 = fadd fast float %7, %add.6
-  %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9
-  %8 = load float, float* %arrayidx.8, align 4
+  %arrayidx.8 = getelementptr inbounds float, ptr %x, i64 9
+  %8 = load float, ptr %arrayidx.8, align 4
   %add.8 = fadd fast float %8, %add.7
-  %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10
-  %9 = load float, float* %arrayidx.9, align 4
+  %arrayidx.9 = getelementptr inbounds float, ptr %x, i64 10
+  %9 = load float, ptr %arrayidx.9, align 4
   %add.9 = fadd fast float %9, %add.8
-  %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11
-  %10 = load float, float* %arrayidx.10, align 4
+  %arrayidx.10 = getelementptr inbounds float, ptr %x, i64 11
+  %10 = load float, ptr %arrayidx.10, align 4
   %add.10 = fadd fast float %10, %add.9
-  %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12
-  %11 = load float, float* %arrayidx.11, align 4
+  %arrayidx.11 = getelementptr inbounds float, ptr %x, i64 12
+  %11 = load float, ptr %arrayidx.11, align 4
   %add.11 = fadd fast float %11, %add.10
-  %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13
-  %12 = load float, float* %arrayidx.12, align 4
+  %arrayidx.12 = getelementptr inbounds float, ptr %x, i64 13
+  %12 = load float, ptr %arrayidx.12, align 4
   %add.12 = fadd fast float %12, %add.11
-  %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14
-  %13 = load float, float* %arrayidx.13, align 4
+  %arrayidx.13 = getelementptr inbounds float, ptr %x, i64 14
+  %13 = load float, ptr %arrayidx.13, align 4
   %add.13 = fadd fast float %13, %add.12
-  %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15
-  %14 = load float, float* %arrayidx.14, align 4
+  %arrayidx.14 = getelementptr inbounds float, ptr %x, i64 15
+  %14 = load float, ptr %arrayidx.14, align 4
   %add.14 = fadd fast float %14, %add.13
-  %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16
-  %15 = load float, float* %arrayidx.15, align 4
+  %arrayidx.15 = getelementptr inbounds float, ptr %x, i64 16
+  %15 = load float, ptr %arrayidx.15, align 4
   %add.15 = fadd fast float %15, %add.14
-  %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17
-  %16 = load float, float* %arrayidx.16, align 4
+  %arrayidx.16 = getelementptr inbounds float, ptr %x, i64 17
+  %16 = load float, ptr %arrayidx.16, align 4
   %add.16 = fadd fast float %16, %add.15
-  %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18
-  %17 = load float, float* %arrayidx.17, align 4
+  %arrayidx.17 = getelementptr inbounds float, ptr %x, i64 18
+  %17 = load float, ptr %arrayidx.17, align 4
   %add.17 = fadd fast float %17, %add.16
-  %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19
-  %18 = load float, float* %arrayidx.18, align 4
+  %arrayidx.18 = getelementptr inbounds float, ptr %x, i64 19
+  %18 = load float, ptr %arrayidx.18, align 4
   %add.18 = fadd fast float %18, %add.17
-  %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20
-  %19 = load float, float* %arrayidx.19, align 4
+  %arrayidx.19 = getelementptr inbounds float, ptr %x, i64 20
+  %19 = load float, ptr %arrayidx.19, align 4
   %add.19 = fadd fast float %19, %add.18
-  %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21
-  %20 = load float, float* %arrayidx.20, align 4
+  %arrayidx.20 = getelementptr inbounds float, ptr %x, i64 21
+  %20 = load float, ptr %arrayidx.20, align 4
   %add.20 = fadd fast float %20, %add.19
-  %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22
-  %21 = load float, float* %arrayidx.21, align 4
+  %arrayidx.21 = getelementptr inbounds float, ptr %x, i64 22
+  %21 = load float, ptr %arrayidx.21, align 4
   %add.21 = fadd fast float %21, %add.20
-  %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23
-  %22 = load float, float* %arrayidx.22, align 4
+  %arrayidx.22 = getelementptr inbounds float, ptr %x, i64 23
+  %22 = load float, ptr %arrayidx.22, align 4
   %add.22 = fadd fast float %22, %add.21
-  %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24
-  %23 = load float, float* %arrayidx.23, align 4
+  %arrayidx.23 = getelementptr inbounds float, ptr %x, i64 24
+  %23 = load float, ptr %arrayidx.23, align 4
   %add.23 = fadd fast float %23, %add.22
-  %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25
-  %24 = load float, float* %arrayidx.24, align 4
+  %arrayidx.24 = getelementptr inbounds float, ptr %x, i64 25
+  %24 = load float, ptr %arrayidx.24, align 4
   %add.24 = fadd fast float %24, %add.23
-  %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26
-  %25 = load float, float* %arrayidx.25, align 4
+  %arrayidx.25 = getelementptr inbounds float, ptr %x, i64 26
+  %25 = load float, ptr %arrayidx.25, align 4
   %add.25 = fadd fast float %25, %add.24
-  %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27
-  %26 = load float, float* %arrayidx.26, align 4
+  %arrayidx.26 = getelementptr inbounds float, ptr %x, i64 27
+  %26 = load float, ptr %arrayidx.26, align 4
   %add.26 = fadd fast float %26, %add.25
-  %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28
-  %27 = load float, float* %arrayidx.27, align 4
+  %arrayidx.27 = getelementptr inbounds float, ptr %x, i64 28
+  %27 = load float, ptr %arrayidx.27, align 4
   %add.27 = fadd fast float %27, %add.26
-  %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29
-  %28 = load float, float* %arrayidx.28, align 4
+  %arrayidx.28 = getelementptr inbounds float, ptr %x, i64 29
+  %28 = load float, ptr %arrayidx.28, align 4
   %add.28 = fadd fast float %28, %add.27
-  %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30
-  %29 = load float, float* %arrayidx.29, align 4
+  %arrayidx.29 = getelementptr inbounds float, ptr %x, i64 30
+  %29 = load float, ptr %arrayidx.29, align 4
   %add.29 = fadd fast float %29, %add.28
   ret float %add.29
 }
 
-define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
+define float @extra_args(ptr nocapture readonly %x, i32 %a, i32 %b) {
 ; CHECK-LABEL: @extra_args(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[CONV]], 3.000000e+00
@@ -764,8 +751,7 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+00>, float [[TMP2]], i32 0
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0
@@ -779,41 +765,40 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
   entry:
   %mul = mul nsw i32 %b, %a
   %conv = sitofp i32 %mul to float
-  %0 = load float, float* %x, align 4
+  %0 = load float, ptr %x, align 4
   %add = fadd fast float %conv, 3.000000e+00
   %add1 = fadd fast float %0, %add
-  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
-  %1 = load float, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
+  %1 = load float, ptr %arrayidx3, align 4
   %add4 = fadd fast float %1, %add1
   %add5 = fadd fast float %add4, %conv
-  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
-  %2 = load float, float* %arrayidx3.1, align 4
+  %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
+  %2 = load float, ptr %arrayidx3.1, align 4
   %add4.1 = fadd fast float %2, %add5
-  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
-  %3 = load float, float* %arrayidx3.2, align 4
+  %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
+  %3 = load float, ptr %arrayidx3.2, align 4
   %add4.2 = fadd fast float %3, %add4.1
-  %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
-  %4 = load float, float* %arrayidx3.3, align 4
+  %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
+  %4 = load float, ptr %arrayidx3.3, align 4
   %add4.3 = fadd fast float %4, %add4.2
-  %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
-  %5 = load float, float* %arrayidx3.4, align 4
+  %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
+  %5 = load float, ptr %arrayidx3.4, align 4
   %add4.4 = fadd fast float %5, %add4.3
-  %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
-  %6 = load float, float* %arrayidx3.5, align 4
+  %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
+  %6 = load float, ptr %arrayidx3.5, align 4
   %add4.5 = fadd fast float %6, %add4.4
-  %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
-  %7 = load float, float* %arrayidx3.6, align 4
+  %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
+  %7 = load float, ptr %arrayidx3.6, align 4
   %add4.6 = fadd fast float %7, %add4.5
   ret float %add4.6
 }
 
-define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a, i32 %b) {
+define float @extra_args_same_several_times(ptr nocapture readonly %x, i32 %a, i32 %b) {
 ; CHECK-LABEL: @extra_args_same_several_times(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], 5.000000e+00
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[CONV]], [[CONV]]
@@ -825,8 +810,7 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
 ; THRESHOLD-NEXT:  entry:
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], 5.000000e+00
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[OP_RDX]], i32 0
@@ -841,44 +825,43 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
   entry:
   %mul = mul nsw i32 %b, %a
   %conv = sitofp i32 %mul to float
-  %0 = load float, float* %x, align 4
+  %0 = load float, ptr %x, align 4
   %add = fadd fast float %conv, 3.000000e+00
   %add1 = fadd fast float %0, %add
-  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
-  %1 = load float, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
+  %1 = load float, ptr %arrayidx3, align 4
   %add4 = fadd fast float %1, %add1
   %add41 = fadd fast float %add4, 5.000000e+00
   %add5 = fadd fast float %add41, %conv
-  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
-  %2 = load float, float* %arrayidx3.1, align 4
+  %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
+  %2 = load float, ptr %arrayidx3.1, align 4
   %add4.1 = fadd fast float %2, %add5
   %add4.11 = fadd fast float %add4.1, 5.000000e+00
-  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
-  %3 = load float, float* %arrayidx3.2, align 4
+  %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
+  %3 = load float, ptr %arrayidx3.2, align 4
   %add4.2 = fadd fast float %3, %add4.11
-  %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
-  %4 = load float, float* %arrayidx3.3, align 4
+  %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
+  %4 = load float, ptr %arrayidx3.3, align 4
   %add4.3 = fadd fast float %4, %add4.2
-  %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
-  %5 = load float, float* %arrayidx3.4, align 4
+  %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
+  %5 = load float, ptr %arrayidx3.4, align 4
   %add4.4 = fadd fast float %5, %add4.3
-  %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
-  %6 = load float, float* %arrayidx3.5, align 4
+  %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
+  %6 = load float, ptr %arrayidx3.5, align 4
   %add4.5 = fadd fast float %6, %add4.4
-  %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
-  %7 = load float, float* %arrayidx3.6, align 4
+  %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
+  %7 = load float, ptr %arrayidx3.6, align 4
   %add4.6 = fadd fast float %7, %add4.5
   ret float %add4.6
 }
 
-define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
+define float @extra_args_no_replace(ptr nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @extra_args_no_replace(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; CHECK-NEXT:    [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[CONV]]
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = fadd fast float [[CONV]], [[CONVC]]
@@ -891,8 +874,7 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
 ; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
 ; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; THRESHOLD-NEXT:    [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
-; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X:%.*]] to <8 x float>*
-; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
+; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[X:%.*]], align 4
 ; THRESHOLD-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]])
 ; THRESHOLD-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[CONVC]], i32 1
@@ -908,47 +890,47 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
   entry:
   %mul = mul nsw i32 %b, %a
   %conv = sitofp i32 %mul to float
-  %0 = load float, float* %x, align 4
+  %0 = load float, ptr %x, align 4
   %convc = sitofp i32 %c to float
   %addc = fadd fast float %convc, 3.000000e+00
   %add = fadd fast float %conv, %addc
   %add1 = fadd fast float %0, %add
-  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
-  %1 = load float, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
+  %1 = load float, ptr %arrayidx3, align 4
   %add4 = fadd fast float %1, %add1
-  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
-  %2 = load float, float* %arrayidx3.1, align 4
+  %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
+  %2 = load float, ptr %arrayidx3.1, align 4
   %add4.1 = fadd fast float %2, %add4
-  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
-  %3 = load float, float* %arrayidx3.2, align 4
+  %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
+  %3 = load float, ptr %arrayidx3.2, align 4
   %add4.2 = fadd fast float %3, %add4.1
-  %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
-  %4 = load float, float* %arrayidx3.3, align 4
+  %arrayidx3.3 = getelementptr inbounds float, ptr %x, i64 4
+  %4 = load float, ptr %arrayidx3.3, align 4
   %add4.3 = fadd fast float %4, %add4.2
   %add5 = fadd fast float %add4.3, %conv
-  %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
-  %5 = load float, float* %arrayidx3.4, align 4
+  %arrayidx3.4 = getelementptr inbounds float, ptr %x, i64 5
+  %5 = load float, ptr %arrayidx3.4, align 4
   %add4.4 = fadd fast float %5, %add5
-  %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
-  %6 = load float, float* %arrayidx3.5, align 4
+  %arrayidx3.5 = getelementptr inbounds float, ptr %x, i64 6
+  %6 = load float, ptr %arrayidx3.5, align 4
   %add4.5 = fadd fast float %6, %add4.4
-  %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
-  %7 = load float, float* %arrayidx3.6, align 4
+  %arrayidx3.6 = getelementptr inbounds float, ptr %x, i64 7
+  %7 = load float, ptr %arrayidx3.6, align 4
   %add4.6 = fadd fast float %7, %add4.5
   ret float %add4.6
 }
 
-define float @extra_args_no_fast(float* %x, float %a, float %b) {
+define float @extra_args_no_fast(ptr %x, float %a, float %b) {
 ; CHECK-LABEL: @extra_args_no_fast(
 ; CHECK-NEXT:    [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
-; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
-; CHECK-NEXT:    [[T0:%.*]] = load float, float* [[X]], align 4
-; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
-; CHECK-NEXT:    [[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT:    [[T0:%.*]] = load float, ptr [[X]], align 4
+; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
 ; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
 ; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
@@ -959,13 +941,13 @@ define float @extra_args_no_fast(float* %x, float %a, float %b) {
 ; THRESHOLD-LABEL: @extra_args_no_fast(
 ; THRESHOLD-NEXT:    [[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.000000e+00
 ; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
-; THRESHOLD-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
-; THRESHOLD-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
-; THRESHOLD-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
-; THRESHOLD-NEXT:    [[T0:%.*]] = load float, float* [[X]], align 4
-; THRESHOLD-NEXT:    [[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; THRESHOLD-NEXT:    [[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
-; THRESHOLD-NEXT:    [[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
+; THRESHOLD-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
+; THRESHOLD-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; THRESHOLD-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; THRESHOLD-NEXT:    [[T0:%.*]] = load float, ptr [[X]], align 4
+; THRESHOLD-NEXT:    [[T1:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; THRESHOLD-NEXT:    [[T2:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
+; THRESHOLD-NEXT:    [[T3:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
 ; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
 ; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
 ; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
@@ -975,13 +957,13 @@ define float @extra_args_no_fast(float* %x, float %a, float %b) {
 ;
   %addc = fadd fast float %b, 3.0
   %add = fadd fast float %a, %addc
-  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
-  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
-  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
-  %t0 = load float, float* %x, align 4
-  %t1 = load float, float* %arrayidx3, align 4
-  %t2 = load float, float* %arrayidx3.1, align 4
-  %t3 = load float, float* %arrayidx3.2, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %x, i64 1
+  %arrayidx3.1 = getelementptr inbounds float, ptr %x, i64 2
+  %arrayidx3.2 = getelementptr inbounds float, ptr %x, i64 3
+  %t0 = load float, ptr %x, align 4
+  %t1 = load float, ptr %arrayidx3, align 4
+  %t2 = load float, ptr %arrayidx3.1, align 4
+  %t3 = load float, ptr %arrayidx3.2, align 4
   %add1 = fadd fast float %t0, %add
   %add4 = fadd fast float %t1, %add1
   %add4.1 = fadd float %t2, %add4  ; this is not a reduction candidate

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index dce22f886da17..4f865fcdbeac5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -6,7 +6,7 @@
 
 @arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
 @arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16
- at arrp = local_unnamed_addr global [32 x i32*] zeroinitializer, align 16
+ at arrp = local_unnamed_addr global [32 x ptr] zeroinitializer, align 16
 @var = global i32 zeroinitializer, align 8
 
 declare i32 @llvm.smax.i32(i32, i32)
@@ -18,65 +18,65 @@ declare i32 @llvm.umin.i32(i32, i32)
 
 define i32 @maxi8(i32) {
 ; SSE-LABEL: @maxi8(
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 ; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 ; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 ; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 ; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 ; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
-; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 ; SSE-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
-; SSE-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; SSE-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 ; SSE-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
 ; SSE-NEXT:    ret i32 [[TMP23]]
 ;
 ; AVX-LABEL: @maxi8(
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; AVX-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX-NEXT:    ret i32 [[TMP3]]
 ;
 ; AVX2-LABEL: @maxi8(
-; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; AVX2-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX2-NEXT:    ret i32 [[TMP3]]
 ;
 ; THRESH-LABEL: @maxi8(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; THRESH-NEXT:    ret i32 [[TMP3]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
-  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
   %19 = icmp sgt i32 %17, %18
   %20 = select i1 %19, i32 %17, i32 %18
-  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %22 = icmp sgt i32 %20, %21
   %23 = select i1 %22, i32 %20, i32 %21
   ret i32 %23
@@ -84,70 +84,70 @@ define i32 @maxi8(i32) {
 
 define i32 @maxi8_store_in(i32) {
 ; SSE-LABEL: @maxi8_store_in(
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 ; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 ; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 ; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
-; SSE-NEXT:    store i32 0, i32* @var, align 8
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+; SSE-NEXT:    store i32 0, ptr @var, align 8
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 ; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 ; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
-; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 ; SSE-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
-; SSE-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; SSE-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 ; SSE-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
 ; SSE-NEXT:    ret i32 [[TMP23]]
 ;
 ; AVX-LABEL: @maxi8_store_in(
-; AVX-NEXT:    store i32 0, i32* @var, align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; AVX-NEXT:    store i32 0, ptr @var, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; AVX-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX-NEXT:    ret i32 [[TMP3]]
 ;
 ; AVX2-LABEL: @maxi8_store_in(
-; AVX2-NEXT:    store i32 0, i32* @var, align 8
-; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; AVX2-NEXT:    store i32 0, ptr @var, align 8
+; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; AVX2-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX2-NEXT:    ret i32 [[TMP3]]
 ;
 ; THRESH-LABEL: @maxi8_store_in(
-; THRESH-NEXT:    store i32 0, i32* @var, align 8
-; THRESH-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; THRESH-NEXT:    store i32 0, ptr @var, align 8
+; THRESH-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; THRESH-NEXT:    ret i32 [[TMP3]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  store i32 0, i32* @var, align 8
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  store i32 0, ptr @var, align 8
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
-  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
   %19 = icmp sgt i32 %17, %18
   %20 = select i1 %19, i32 %17, i32 %18
-  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %22 = icmp sgt i32 %20, %21
   %23 = select i1 %22, i32 %20, i32 %21
   ret i32 %23
@@ -155,54 +155,54 @@ define i32 @maxi8_store_in(i32) {
 
 define i32 @maxi16(i32) {
 ; CHECK-LABEL: @maxi16(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @arr, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
-  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
   %19 = icmp sgt i32 %17, %18
   %20 = select i1 %19, i32 %17, i32 %18
-  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %22 = icmp sgt i32 %20, %21
   %23 = select i1 %22, i32 %20, i32 %21
-  %24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
+  %24 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
   %25 = icmp sgt i32 %23, %24
   %26 = select i1 %25, i32 %23, i32 %24
-  %27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
+  %27 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
   %28 = icmp sgt i32 %26, %27
   %29 = select i1 %28, i32 %26, i32 %27
-  %30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
+  %30 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
   %31 = icmp sgt i32 %29, %30
   %32 = select i1 %31, i32 %29, i32 %30
-  %33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
+  %33 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
   %34 = icmp sgt i32 %32, %33
   %35 = select i1 %34, i32 %32, i32 %33
-  %36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
+  %36 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
   %37 = icmp sgt i32 %35, %36
   %38 = select i1 %37, i32 %35, i32 %36
-  %39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
+  %39 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
   %40 = icmp sgt i32 %38, %39
   %41 = select i1 %40, i32 %38, i32 %39
-  %42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
+  %42 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
   %43 = icmp sgt i32 %41, %42
   %44 = select i1 %43, i32 %41, i32 %42
-  %45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
+  %45 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
   %46 = icmp sgt i32 %44, %45
   %47 = select i1 %46, i32 %44, i32 %45
   ret i32 %47
@@ -210,102 +210,102 @@ define i32 @maxi16(i32) {
 
 define i32 @maxi32(i32) {
 ; CHECK-LABEL: @maxi32(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i32>, ptr @arr, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
-  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
   %19 = icmp sgt i32 %17, %18
   %20 = select i1 %19, i32 %17, i32 %18
-  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %22 = icmp sgt i32 %20, %21
   %23 = select i1 %22, i32 %20, i32 %21
-  %24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
+  %24 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
   %25 = icmp sgt i32 %23, %24
   %26 = select i1 %25, i32 %23, i32 %24
-  %27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
+  %27 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
   %28 = icmp sgt i32 %26, %27
   %29 = select i1 %28, i32 %26, i32 %27
-  %30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
+  %30 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
   %31 = icmp sgt i32 %29, %30
   %32 = select i1 %31, i32 %29, i32 %30
-  %33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
+  %33 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
   %34 = icmp sgt i32 %32, %33
   %35 = select i1 %34, i32 %32, i32 %33
-  %36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
+  %36 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
   %37 = icmp sgt i32 %35, %36
   %38 = select i1 %37, i32 %35, i32 %36
-  %39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
+  %39 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
   %40 = icmp sgt i32 %38, %39
   %41 = select i1 %40, i32 %38, i32 %39
-  %42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
+  %42 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
   %43 = icmp sgt i32 %41, %42
   %44 = select i1 %43, i32 %41, i32 %42
-  %45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
+  %45 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
   %46 = icmp sgt i32 %44, %45
   %47 = select i1 %46, i32 %44, i32 %45
-  %48 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 16), align 16
+  %48 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 16), align 16
   %49 = icmp sgt i32 %47, %48
   %50 = select i1 %49, i32 %47, i32 %48
-  %51 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 17), align 4
+  %51 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 17), align 4
   %52 = icmp sgt i32 %50, %51
   %53 = select i1 %52, i32 %50, i32 %51
-  %54 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 18), align 8
+  %54 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 18), align 8
   %55 = icmp sgt i32 %53, %54
   %56 = select i1 %55, i32 %53, i32 %54
-  %57 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 19), align 4
+  %57 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 19), align 4
   %58 = icmp sgt i32 %56, %57
   %59 = select i1 %58, i32 %56, i32 %57
-  %60 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 20), align 16
+  %60 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 20), align 16
   %61 = icmp sgt i32 %59, %60
   %62 = select i1 %61, i32 %59, i32 %60
-  %63 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 21), align 4
+  %63 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 21), align 4
   %64 = icmp sgt i32 %62, %63
   %65 = select i1 %64, i32 %62, i32 %63
-  %66 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 22), align 8
+  %66 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 22), align 8
   %67 = icmp sgt i32 %65, %66
   %68 = select i1 %67, i32 %65, i32 %66
-  %69 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 23), align 4
+  %69 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 23), align 4
   %70 = icmp sgt i32 %68, %69
   %71 = select i1 %70, i32 %68, i32 %69
-  %72 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 24), align 16
+  %72 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 24), align 16
   %73 = icmp sgt i32 %71, %72
   %74 = select i1 %73, i32 %71, i32 %72
-  %75 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 25), align 4
+  %75 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 25), align 4
   %76 = icmp sgt i32 %74, %75
   %77 = select i1 %76, i32 %74, i32 %75
-  %78 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 26), align 8
+  %78 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 26), align 8
   %79 = icmp sgt i32 %77, %78
   %80 = select i1 %79, i32 %77, i32 %78
-  %81 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 27), align 4
+  %81 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 27), align 4
   %82 = icmp sgt i32 %80, %81
   %83 = select i1 %82, i32 %80, i32 %81
-  %84 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 28), align 16
+  %84 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 28), align 16
   %85 = icmp sgt i32 %83, %84
   %86 = select i1 %85, i32 %83, i32 %84
-  %87 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 29), align 4
+  %87 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 29), align 4
   %88 = icmp sgt i32 %86, %87
   %89 = select i1 %88, i32 %86, i32 %87
-  %90 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 30), align 8
+  %90 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 30), align 8
   %91 = icmp sgt i32 %89, %90
   %92 = select i1 %91, i32 %89, i32 %90
-  %93 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 31), align 4
+  %93 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 31), align 4
   %94 = icmp sgt i32 %92, %93
   %95 = select i1 %94, i32 %92, i32 %93
   ret i32 %95
@@ -315,76 +315,76 @@ define i32 @maxi32(i32) {
 
 define float @maxf8(float) {
 ; DEFAULT-LABEL: @maxf8(
-; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
-; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
+; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
+; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
-; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 ; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
-; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 ; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
-; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 ; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
-; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 ; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
-; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 ; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
 ; DEFAULT-NEXT:    ret float [[TMP23]]
 ;
 ; THRESH-LABEL: @maxf8(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 ; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
-; THRESH-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 ; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
-; THRESH-NEXT:    [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 ; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
-; THRESH-NEXT:    [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 ; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
-; THRESH-NEXT:    [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 ; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
-; THRESH-NEXT:    [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 ; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
 ; THRESH-NEXT:    ret float [[TMP24]]
 ;
-  %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
-  %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
+  %2 = load float, ptr @arr1, align 16
+  %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
   %4 = fcmp fast ogt float %2, %3
   %5 = select i1 %4, float %2, float %3
-  %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
   %7 = fcmp fast ogt float %5, %6
   %8 = select i1 %7, float %5, float %6
-  %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+  %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
   %10 = fcmp fast ogt float %8, %9
   %11 = select i1 %10, float %8, float %9
-  %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+  %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
   %13 = fcmp fast ogt float %11, %12
   %14 = select i1 %13, float %11, float %12
-  %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+  %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
   %16 = fcmp fast ogt float %14, %15
   %17 = select i1 %16, float %14, float %15
-  %18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+  %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
   %19 = fcmp fast ogt float %17, %18
   %20 = select i1 %19, float %17, float %18
-  %21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+  %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
   %22 = fcmp fast ogt float %20, %21
   %23 = select i1 %22, float %20, float %21
   ret float %23
@@ -394,148 +394,148 @@ define float @maxf8(float) {
 
 define float @maxf16(float) {
 ; DEFAULT-LABEL: @maxf16(
-; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
-; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
+; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
+; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
-; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 ; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
-; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 ; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
-; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 ; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
-; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 ; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
-; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 ; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
-; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
+; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 ; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
 ; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
-; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
+; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 ; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
 ; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
-; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
+; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 ; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
 ; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
-; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
+; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 ; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
 ; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
-; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
+; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 ; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
 ; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
-; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
+; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 ; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
 ; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
-; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
+; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 ; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
 ; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
-; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
+; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 ; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
 ; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
 ; DEFAULT-NEXT:    ret float [[TMP47]]
 ;
 ; THRESH-LABEL: @maxf16(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 ; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
-; THRESH-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 ; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
-; THRESH-NEXT:    [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 ; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
-; THRESH-NEXT:    [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 ; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
-; THRESH-NEXT:    [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 ; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
-; THRESH-NEXT:    [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 ; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
-; THRESH-NEXT:    [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
+; THRESH-NEXT:    [[TMP25:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 ; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
 ; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
-; THRESH-NEXT:    [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
+; THRESH-NEXT:    [[TMP28:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 ; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
 ; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
-; THRESH-NEXT:    [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
+; THRESH-NEXT:    [[TMP31:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 ; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
 ; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
-; THRESH-NEXT:    [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
+; THRESH-NEXT:    [[TMP34:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 ; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
 ; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
-; THRESH-NEXT:    [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
+; THRESH-NEXT:    [[TMP37:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 ; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
 ; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
-; THRESH-NEXT:    [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
+; THRESH-NEXT:    [[TMP40:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 ; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
 ; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
-; THRESH-NEXT:    [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
+; THRESH-NEXT:    [[TMP43:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 ; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
 ; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
-; THRESH-NEXT:    [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
+; THRESH-NEXT:    [[TMP46:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 ; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
 ; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
 ; THRESH-NEXT:    ret float [[TMP48]]
 ;
-  %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
-  %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
+  %2 = load float, ptr @arr1, align 16
+  %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
   %4 = fcmp fast ogt float %2, %3
   %5 = select i1 %4, float %2, float %3
-  %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
   %7 = fcmp fast ogt float %5, %6
   %8 = select i1 %7, float %5, float %6
-  %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+  %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
   %10 = fcmp fast ogt float %8, %9
   %11 = select i1 %10, float %8, float %9
-  %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+  %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
   %13 = fcmp fast ogt float %11, %12
   %14 = select i1 %13, float %11, float %12
-  %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+  %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
   %16 = fcmp fast ogt float %14, %15
   %17 = select i1 %16, float %14, float %15
-  %18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+  %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
   %19 = fcmp fast ogt float %17, %18
   %20 = select i1 %19, float %17, float %18
-  %21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+  %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
   %22 = fcmp fast ogt float %20, %21
   %23 = select i1 %22, float %20, float %21
-  %24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
+  %24 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
   %25 = fcmp fast ogt float %23, %24
   %26 = select i1 %25, float %23, float %24
-  %27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
+  %27 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
   %28 = fcmp fast ogt float %26, %27
   %29 = select i1 %28, float %26, float %27
-  %30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
+  %30 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
   %31 = fcmp fast ogt float %29, %30
   %32 = select i1 %31, float %29, float %30
-  %33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
+  %33 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
   %34 = fcmp fast ogt float %32, %33
   %35 = select i1 %34, float %32, float %33
-  %36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
+  %36 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
   %37 = fcmp fast ogt float %35, %36
   %38 = select i1 %37, float %35, float %36
-  %39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
+  %39 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
   %40 = fcmp fast ogt float %38, %39
   %41 = select i1 %40, float %38, float %39
-  %42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
+  %42 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
   %43 = fcmp fast ogt float %41, %42
   %44 = select i1 %43, float %41, float %42
-  %45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
+  %45 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
   %46 = fcmp fast ogt float %44, %45
   %47 = select i1 %46, float %44, float %45
   ret float %47
@@ -545,292 +545,292 @@ define float @maxf16(float) {
 
 define float @maxf32(float) {
 ; DEFAULT-LABEL: @maxf32(
-; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
-; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
+; DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr @arr1, align 16
+; DEFAULT-NEXT:    [[TMP3:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+; DEFAULT-NEXT:    [[TMP6:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
-; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 ; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
-; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 ; DEFAULT-NEXT:    [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
-; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+; DEFAULT-NEXT:    [[TMP15:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 ; DEFAULT-NEXT:    [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
-; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+; DEFAULT-NEXT:    [[TMP18:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 ; DEFAULT-NEXT:    [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
 ; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
-; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+; DEFAULT-NEXT:    [[TMP21:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 ; DEFAULT-NEXT:    [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
 ; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
-; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
+; DEFAULT-NEXT:    [[TMP24:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 ; DEFAULT-NEXT:    [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
 ; DEFAULT-NEXT:    [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
-; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
+; DEFAULT-NEXT:    [[TMP27:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 ; DEFAULT-NEXT:    [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
 ; DEFAULT-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
-; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
+; DEFAULT-NEXT:    [[TMP30:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 ; DEFAULT-NEXT:    [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
 ; DEFAULT-NEXT:    [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
-; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
+; DEFAULT-NEXT:    [[TMP33:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 ; DEFAULT-NEXT:    [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
 ; DEFAULT-NEXT:    [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
-; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
+; DEFAULT-NEXT:    [[TMP36:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 ; DEFAULT-NEXT:    [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
 ; DEFAULT-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
-; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
+; DEFAULT-NEXT:    [[TMP39:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 ; DEFAULT-NEXT:    [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
 ; DEFAULT-NEXT:    [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
-; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
+; DEFAULT-NEXT:    [[TMP42:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 ; DEFAULT-NEXT:    [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
 ; DEFAULT-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
-; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
+; DEFAULT-NEXT:    [[TMP45:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 ; DEFAULT-NEXT:    [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
 ; DEFAULT-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
-; DEFAULT-NEXT:    [[TMP48:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
+; DEFAULT-NEXT:    [[TMP48:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
 ; DEFAULT-NEXT:    [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]]
 ; DEFAULT-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]]
-; DEFAULT-NEXT:    [[TMP51:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
+; DEFAULT-NEXT:    [[TMP51:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
 ; DEFAULT-NEXT:    [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]]
 ; DEFAULT-NEXT:    [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]]
-; DEFAULT-NEXT:    [[TMP54:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
+; DEFAULT-NEXT:    [[TMP54:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
 ; DEFAULT-NEXT:    [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]]
 ; DEFAULT-NEXT:    [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]]
-; DEFAULT-NEXT:    [[TMP57:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
+; DEFAULT-NEXT:    [[TMP57:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
 ; DEFAULT-NEXT:    [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]]
 ; DEFAULT-NEXT:    [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]]
-; DEFAULT-NEXT:    [[TMP60:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
+; DEFAULT-NEXT:    [[TMP60:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
 ; DEFAULT-NEXT:    [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]]
 ; DEFAULT-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]]
-; DEFAULT-NEXT:    [[TMP63:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
+; DEFAULT-NEXT:    [[TMP63:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
 ; DEFAULT-NEXT:    [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]]
 ; DEFAULT-NEXT:    [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]]
-; DEFAULT-NEXT:    [[TMP66:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
+; DEFAULT-NEXT:    [[TMP66:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
 ; DEFAULT-NEXT:    [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]]
 ; DEFAULT-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]]
-; DEFAULT-NEXT:    [[TMP69:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
+; DEFAULT-NEXT:    [[TMP69:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
 ; DEFAULT-NEXT:    [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]]
 ; DEFAULT-NEXT:    [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]]
-; DEFAULT-NEXT:    [[TMP72:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
+; DEFAULT-NEXT:    [[TMP72:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
 ; DEFAULT-NEXT:    [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]]
 ; DEFAULT-NEXT:    [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]]
-; DEFAULT-NEXT:    [[TMP75:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
+; DEFAULT-NEXT:    [[TMP75:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
 ; DEFAULT-NEXT:    [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]]
 ; DEFAULT-NEXT:    [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]]
-; DEFAULT-NEXT:    [[TMP78:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
+; DEFAULT-NEXT:    [[TMP78:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
 ; DEFAULT-NEXT:    [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]]
 ; DEFAULT-NEXT:    [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]]
-; DEFAULT-NEXT:    [[TMP81:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
+; DEFAULT-NEXT:    [[TMP81:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
 ; DEFAULT-NEXT:    [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]]
 ; DEFAULT-NEXT:    [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]]
-; DEFAULT-NEXT:    [[TMP84:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
+; DEFAULT-NEXT:    [[TMP84:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
 ; DEFAULT-NEXT:    [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]]
 ; DEFAULT-NEXT:    [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]]
-; DEFAULT-NEXT:    [[TMP87:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
+; DEFAULT-NEXT:    [[TMP87:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
 ; DEFAULT-NEXT:    [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]]
 ; DEFAULT-NEXT:    [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]]
-; DEFAULT-NEXT:    [[TMP90:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
+; DEFAULT-NEXT:    [[TMP90:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
 ; DEFAULT-NEXT:    [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]]
 ; DEFAULT-NEXT:    [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]]
-; DEFAULT-NEXT:    [[TMP93:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
+; DEFAULT-NEXT:    [[TMP93:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
 ; DEFAULT-NEXT:    [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]]
 ; DEFAULT-NEXT:    [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]]
 ; DEFAULT-NEXT:    ret float [[TMP95]]
 ;
 ; THRESH-LABEL: @maxf32(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @arr1, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; THRESH-NEXT:    [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP7:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
 ; THRESH-NEXT:    [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
-; THRESH-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+; THRESH-NEXT:    [[TMP10:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
 ; THRESH-NEXT:    [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
-; THRESH-NEXT:    [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+; THRESH-NEXT:    [[TMP13:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
 ; THRESH-NEXT:    [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
-; THRESH-NEXT:    [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+; THRESH-NEXT:    [[TMP16:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
 ; THRESH-NEXT:    [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
-; THRESH-NEXT:    [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP19:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
 ; THRESH-NEXT:    [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
 ; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
-; THRESH-NEXT:    [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+; THRESH-NEXT:    [[TMP22:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
 ; THRESH-NEXT:    [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
 ; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
-; THRESH-NEXT:    [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
+; THRESH-NEXT:    [[TMP25:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
 ; THRESH-NEXT:    [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
 ; THRESH-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
-; THRESH-NEXT:    [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
+; THRESH-NEXT:    [[TMP28:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
 ; THRESH-NEXT:    [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
 ; THRESH-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
-; THRESH-NEXT:    [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
+; THRESH-NEXT:    [[TMP31:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
 ; THRESH-NEXT:    [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
 ; THRESH-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
-; THRESH-NEXT:    [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
+; THRESH-NEXT:    [[TMP34:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
 ; THRESH-NEXT:    [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
 ; THRESH-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
-; THRESH-NEXT:    [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
+; THRESH-NEXT:    [[TMP37:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
 ; THRESH-NEXT:    [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
 ; THRESH-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
-; THRESH-NEXT:    [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
+; THRESH-NEXT:    [[TMP40:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
 ; THRESH-NEXT:    [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
 ; THRESH-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
-; THRESH-NEXT:    [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
+; THRESH-NEXT:    [[TMP43:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
 ; THRESH-NEXT:    [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
 ; THRESH-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
-; THRESH-NEXT:    [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
+; THRESH-NEXT:    [[TMP46:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
 ; THRESH-NEXT:    [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
 ; THRESH-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
-; THRESH-NEXT:    [[TMP49:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
+; THRESH-NEXT:    [[TMP49:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
 ; THRESH-NEXT:    [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]]
 ; THRESH-NEXT:    [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float [[TMP49]]
-; THRESH-NEXT:    [[TMP52:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
+; THRESH-NEXT:    [[TMP52:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
 ; THRESH-NEXT:    [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]]
 ; THRESH-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float [[TMP52]]
-; THRESH-NEXT:    [[TMP55:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
+; THRESH-NEXT:    [[TMP55:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
 ; THRESH-NEXT:    [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]]
 ; THRESH-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float [[TMP55]]
-; THRESH-NEXT:    [[TMP58:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
+; THRESH-NEXT:    [[TMP58:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
 ; THRESH-NEXT:    [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]]
 ; THRESH-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float [[TMP58]]
-; THRESH-NEXT:    [[TMP61:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
+; THRESH-NEXT:    [[TMP61:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
 ; THRESH-NEXT:    [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]]
 ; THRESH-NEXT:    [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float [[TMP61]]
-; THRESH-NEXT:    [[TMP64:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
+; THRESH-NEXT:    [[TMP64:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
 ; THRESH-NEXT:    [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]]
 ; THRESH-NEXT:    [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float [[TMP64]]
-; THRESH-NEXT:    [[TMP67:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
+; THRESH-NEXT:    [[TMP67:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
 ; THRESH-NEXT:    [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]]
 ; THRESH-NEXT:    [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float [[TMP67]]
-; THRESH-NEXT:    [[TMP70:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
+; THRESH-NEXT:    [[TMP70:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
 ; THRESH-NEXT:    [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]]
 ; THRESH-NEXT:    [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float [[TMP70]]
-; THRESH-NEXT:    [[TMP73:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
+; THRESH-NEXT:    [[TMP73:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
 ; THRESH-NEXT:    [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]]
 ; THRESH-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float [[TMP73]]
-; THRESH-NEXT:    [[TMP76:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
+; THRESH-NEXT:    [[TMP76:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
 ; THRESH-NEXT:    [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]]
 ; THRESH-NEXT:    [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float [[TMP76]]
-; THRESH-NEXT:    [[TMP79:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
+; THRESH-NEXT:    [[TMP79:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
 ; THRESH-NEXT:    [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]]
 ; THRESH-NEXT:    [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float [[TMP79]]
-; THRESH-NEXT:    [[TMP82:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
+; THRESH-NEXT:    [[TMP82:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
 ; THRESH-NEXT:    [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]]
 ; THRESH-NEXT:    [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float [[TMP82]]
-; THRESH-NEXT:    [[TMP85:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
+; THRESH-NEXT:    [[TMP85:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
 ; THRESH-NEXT:    [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]]
 ; THRESH-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float [[TMP85]]
-; THRESH-NEXT:    [[TMP88:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
+; THRESH-NEXT:    [[TMP88:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
 ; THRESH-NEXT:    [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]]
 ; THRESH-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float [[TMP88]]
-; THRESH-NEXT:    [[TMP91:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
+; THRESH-NEXT:    [[TMP91:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
 ; THRESH-NEXT:    [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]]
 ; THRESH-NEXT:    [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float [[TMP91]]
-; THRESH-NEXT:    [[TMP94:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
+; THRESH-NEXT:    [[TMP94:%.*]] = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
 ; THRESH-NEXT:    [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]]
 ; THRESH-NEXT:    [[TMP96:%.*]] = select i1 [[TMP95]], float [[TMP93]], float [[TMP94]]
 ; THRESH-NEXT:    ret float [[TMP96]]
 ;
-  %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
-  %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
+  %2 = load float, ptr @arr1, align 16
+  %3 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 1), align 4
   %4 = fcmp fast ogt float %2, %3
   %5 = select i1 %4, float %2, float %3
-  %6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
+  %6 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 2), align 8
   %7 = fcmp fast ogt float %5, %6
   %8 = select i1 %7, float %5, float %6
-  %9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
+  %9 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 3), align 4
   %10 = fcmp fast ogt float %8, %9
   %11 = select i1 %10, float %8, float %9
-  %12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
+  %12 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 4), align 16
   %13 = fcmp fast ogt float %11, %12
   %14 = select i1 %13, float %11, float %12
-  %15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
+  %15 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 5), align 4
   %16 = fcmp fast ogt float %14, %15
   %17 = select i1 %16, float %14, float %15
-  %18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
+  %18 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 6), align 8
   %19 = fcmp fast ogt float %17, %18
   %20 = select i1 %19, float %17, float %18
-  %21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
+  %21 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 7), align 4
   %22 = fcmp fast ogt float %20, %21
   %23 = select i1 %22, float %20, float %21
-  %24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
+  %24 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 8), align 16
   %25 = fcmp fast ogt float %23, %24
   %26 = select i1 %25, float %23, float %24
-  %27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
+  %27 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 9), align 4
   %28 = fcmp fast ogt float %26, %27
   %29 = select i1 %28, float %26, float %27
-  %30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
+  %30 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 10), align 8
   %31 = fcmp fast ogt float %29, %30
   %32 = select i1 %31, float %29, float %30
-  %33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
+  %33 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 11), align 4
   %34 = fcmp fast ogt float %32, %33
   %35 = select i1 %34, float %32, float %33
-  %36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
+  %36 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 12), align 16
   %37 = fcmp fast ogt float %35, %36
   %38 = select i1 %37, float %35, float %36
-  %39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
+  %39 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 13), align 4
   %40 = fcmp fast ogt float %38, %39
   %41 = select i1 %40, float %38, float %39
-  %42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
+  %42 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 14), align 8
   %43 = fcmp fast ogt float %41, %42
   %44 = select i1 %43, float %41, float %42
-  %45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
+  %45 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 15), align 4
   %46 = fcmp fast ogt float %44, %45
   %47 = select i1 %46, float %44, float %45
-  %48 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
+  %48 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 16), align 16
   %49 = fcmp fast ogt float %47, %48
   %50 = select i1 %49, float %47, float %48
-  %51 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
+  %51 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 17), align 4
   %52 = fcmp fast ogt float %50, %51
   %53 = select i1 %52, float %50, float %51
-  %54 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
+  %54 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 18), align 8
   %55 = fcmp fast ogt float %53, %54
   %56 = select i1 %55, float %53, float %54
-  %57 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
+  %57 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 19), align 4
   %58 = fcmp fast ogt float %56, %57
   %59 = select i1 %58, float %56, float %57
-  %60 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
+  %60 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 20), align 16
   %61 = fcmp fast ogt float %59, %60
   %62 = select i1 %61, float %59, float %60
-  %63 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
+  %63 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 21), align 4
   %64 = fcmp fast ogt float %62, %63
   %65 = select i1 %64, float %62, float %63
-  %66 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
+  %66 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 22), align 8
   %67 = fcmp fast ogt float %65, %66
   %68 = select i1 %67, float %65, float %66
-  %69 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
+  %69 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 23), align 4
   %70 = fcmp fast ogt float %68, %69
   %71 = select i1 %70, float %68, float %69
-  %72 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
+  %72 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 24), align 16
   %73 = fcmp fast ogt float %71, %72
   %74 = select i1 %73, float %71, float %72
-  %75 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
+  %75 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 25), align 4
   %76 = fcmp fast ogt float %74, %75
   %77 = select i1 %76, float %74, float %75
-  %78 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
+  %78 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 26), align 8
   %79 = fcmp fast ogt float %77, %78
   %80 = select i1 %79, float %77, float %78
-  %81 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
+  %81 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 27), align 4
   %82 = fcmp fast ogt float %80, %81
   %83 = select i1 %82, float %80, float %81
-  %84 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
+  %84 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 28), align 16
   %85 = fcmp fast ogt float %83, %84
   %86 = select i1 %85, float %83, float %84
-  %87 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
+  %87 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 29), align 4
   %88 = fcmp fast ogt float %86, %87
   %89 = select i1 %88, float %86, float %87
-  %90 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
+  %90 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 30), align 8
   %91 = fcmp fast ogt float %89, %90
   %92 = select i1 %91, float %89, float %90
-  %93 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
+  %93 = load float, ptr getelementptr inbounds ([32 x float], ptr @arr1, i64 0, i64 31), align 4
   %94 = fcmp fast ogt float %92, %93
   %95 = select i1 %94, float %92, float %93
   ret float %95
@@ -838,40 +838,40 @@ define float @maxf32(float) {
 
 define i32 @maxi8_mutiple_uses(i32) {
 ; SSE-LABEL: @maxi8_mutiple_uses(
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 ; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 ; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 ; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 ; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 ; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
-; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 ; SSE-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
-; SSE-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; SSE-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 ; SSE-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
 ; SSE-NEXT:    [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; SSE-NEXT:    store i32 [[TMP24]], i32* @var, align 8
+; SSE-NEXT:    store i32 [[TMP24]], ptr @var, align 8
 ; SSE-NEXT:    ret i32 [[TMP23]]
 ;
 ; AVX-LABEL: @maxi8_mutiple_uses(
-; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; AVX-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; AVX-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 ; AVX-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 ; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
@@ -880,17 +880,17 @@ define i32 @maxi8_mutiple_uses(i32) {
 ; AVX-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
 ; AVX-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
 ; AVX-NEXT:    [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; AVX-NEXT:    store i32 [[TMP10]], i32* @var, align 8
+; AVX-NEXT:    store i32 [[TMP10]], ptr @var, align 8
 ; AVX-NEXT:    ret i32 [[OP_RDX5]]
 ;
 ; AVX2-LABEL: @maxi8_mutiple_uses(
-; AVX2-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; AVX2-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; AVX2-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; AVX2-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; AVX2-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 ; AVX2-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 ; AVX2-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
@@ -899,18 +899,18 @@ define i32 @maxi8_mutiple_uses(i32) {
 ; AVX2-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
 ; AVX2-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
 ; AVX2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP4]], i32 3, i32 4
-; AVX2-NEXT:    store i32 [[TMP10]], i32* @var, align 8
+; AVX2-NEXT:    store i32 [[TMP10]], ptr @var, align 8
 ; AVX2-NEXT:    ret i32 [[OP_RDX5]]
 ;
 ; THRESH-LABEL: @maxi8_mutiple_uses(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; THRESH-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; THRESH-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; THRESH-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP7]])
 ; THRESH-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
 ; THRESH-NEXT:    [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP9]], i32 1
@@ -923,139 +923,139 @@ define i32 @maxi8_mutiple_uses(i32) {
 ; THRESH-NEXT:    [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 ; THRESH-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP17]], i32 [[TMP18]]
 ; THRESH-NEXT:    [[TMP19:%.*]] = select i1 [[TMP5]], i32 3, i32 4
-; THRESH-NEXT:    store i32 [[TMP19]], i32* @var, align 8
+; THRESH-NEXT:    store i32 [[TMP19]], ptr @var, align 8
 ; THRESH-NEXT:    ret i32 [[OP_RDX5]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
-  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
   %19 = icmp sgt i32 %17, %18
   %20 = select i1 %19, i32 %17, i32 %18
-  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %22 = icmp sgt i32 %20, %21
   %23 = select i1 %22, i32 %20, i32 %21
   %24 = select i1 %4, i32 3, i32 4
-  store i32 %24, i32* @var, align 8
+  store i32 %24, ptr @var, align 8
   ret i32 %23
 }
 
 define i32 @maxi8_mutiple_uses2(i32) {
 ; DEFAULT-LABEL: @maxi8_mutiple_uses2(
-; DEFAULT-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; DEFAULT-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
-; DEFAULT-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
-; DEFAULT-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 ; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 ; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
-; DEFAULT-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+; DEFAULT-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 ; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 ; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
 ; DEFAULT-NEXT:    [[TMP18:%.*]] = select i1 [[TMP10]], i32 3, i32 4
-; DEFAULT-NEXT:    store i32 [[TMP18]], i32* @var, align 8
+; DEFAULT-NEXT:    store i32 [[TMP18]], ptr @var, align 8
 ; DEFAULT-NEXT:    ret i32 [[TMP17]]
 ;
 ; THRESH-LABEL: @maxi8_mutiple_uses2(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; THRESH-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
 ; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]]
-; THRESH-NEXT:    [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; THRESH-NEXT:    [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; THRESH-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
 ; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
-; THRESH-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+; THRESH-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 ; THRESH-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
 ; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]]
-; THRESH-NEXT:    [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+; THRESH-NEXT:    [[TMP16:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 ; THRESH-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
 ; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]]
 ; THRESH-NEXT:    [[TMP19:%.*]] = select i1 [[TMP11]], i32 3, i32 4
-; THRESH-NEXT:    store i32 [[TMP19]], i32* @var, align 8
+; THRESH-NEXT:    store i32 [[TMP19]], ptr @var, align 8
 ; THRESH-NEXT:    ret i32 [[TMP18]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
   %18 = select i1 %10, i32 3, i32 4
-  store i32 %18, i32* @var, align 8
+  store i32 %18, ptr @var, align 8
   ret i32 %17
 }
 
 define i32 @maxi8_wrong_parent(i32) {
 ; SSE-LABEL: @maxi8_wrong_parent(
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; SSE-NEXT:    br label [[PP:%.*]]
 ; SSE:       pp:
 ; SSE-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
 ; SSE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
 ; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
 ; SSE-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
 ; SSE-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 ; SSE-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
-; SSE-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP18:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
 ; SSE-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
-; SSE-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP21:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; SSE-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 ; SSE-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
 ; SSE-NEXT:    ret i32 [[TMP23]]
 ;
 ; AVX-LABEL: @maxi8_wrong_parent(
-; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; AVX-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; AVX-NEXT:    br label [[PP:%.*]]
 ; AVX:       pp:
 ; AVX-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; AVX-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 ; AVX-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 ; AVX-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
@@ -1066,15 +1066,15 @@ define i32 @maxi8_wrong_parent(i32) {
 ; AVX-NEXT:    ret i32 [[OP_RDX5]]
 ;
 ; AVX2-LABEL: @maxi8_wrong_parent(
-; AVX2-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; AVX2-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; AVX2-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; AVX2-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; AVX2-NEXT:    br label [[PP:%.*]]
 ; AVX2:       pp:
 ; AVX2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
-; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; AVX2-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; AVX2-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
 ; AVX2-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
 ; AVX2-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP9]], i32 [[TMP7]]
@@ -1085,16 +1085,16 @@ define i32 @maxi8_wrong_parent(i32) {
 ; AVX2-NEXT:    ret i32 [[OP_RDX5]]
 ;
 ; THRESH-LABEL: @maxi8_wrong_parent(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr @arr, align 16
 ; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
 ; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
 ; THRESH-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
 ; THRESH-NEXT:    br label [[PP:%.*]]
 ; THRESH:       pp:
 ; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; THRESH-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; THRESH-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; THRESH-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; THRESH-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP7]])
 ; THRESH-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
 ; THRESH-NEXT:    [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP9]], i32 1
@@ -1108,129 +1108,129 @@ define i32 @maxi8_wrong_parent(i32) {
 ; THRESH-NEXT:    [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP17]], i32 [[TMP18]]
 ; THRESH-NEXT:    ret i32 [[OP_RDX5]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = icmp sgt i32 %2, %3
   br label %pp
 
 pp:
   %5 = select i1 %4, i32 %2, i32 %3
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
   %7 = icmp sgt i32 %5, %6
   %8 = select i1 %7, i32 %5, i32 %6
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %10 = icmp sgt i32 %8, %9
   %11 = select i1 %10, i32 %8, i32 %9
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
   %13 = icmp sgt i32 %11, %12
   %14 = select i1 %13, i32 %11, i32 %12
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
   %16 = icmp sgt i32 %14, %15
   %17 = select i1 %16, i32 %14, i32 %15
-  %18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
+  %18 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
   %19 = icmp sgt i32 %17, %18
   %20 = select i1 %19, i32 %17, i32 %18
-  %21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %21 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %22 = icmp sgt i32 %20, %21
   %23 = select i1 %22, i32 %20, i32 %21
   ret i32 %23
 }
 
 ; PR38191 - We don't handle array-of-pointer reductions.
-define i32* @maxp8(i32) {
+define ptr @maxp8(i32) {
 ; DEFAULT-LABEL: @maxp8(
-; DEFAULT-NEXT:    [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
-; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
-; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp ugt i32* [[TMP2]], [[TMP3]]
-; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32* [[TMP2]], i32* [[TMP3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
-; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ugt i32* [[TMP5]], [[TMP6]]
-; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32* [[TMP5]], i32* [[TMP6]]
-; DEFAULT-NEXT:    [[TMP9:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
-; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp ugt i32* [[TMP8]], [[TMP9]]
-; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i32* [[TMP8]], i32* [[TMP9]]
-; DEFAULT-NEXT:    [[TMP12:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
-; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp ugt i32* [[TMP11]], [[TMP12]]
-; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32* [[TMP11]], i32* [[TMP12]]
-; DEFAULT-NEXT:    [[TMP15:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
-; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp ugt i32* [[TMP14]], [[TMP15]]
-; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32* [[TMP14]], i32* [[TMP15]]
-; DEFAULT-NEXT:    [[TMP18:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
-; DEFAULT-NEXT:    [[TMP19:%.*]] = icmp ugt i32* [[TMP17]], [[TMP18]]
-; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32* [[TMP17]], i32* [[TMP18]]
-; DEFAULT-NEXT:    [[TMP21:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
-; DEFAULT-NEXT:    [[TMP22:%.*]] = icmp ugt i32* [[TMP20]], [[TMP21]]
-; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32* [[TMP20]], i32* [[TMP21]]
-; DEFAULT-NEXT:    ret i32* [[TMP23]]
+; DEFAULT-NEXT:    [[TMP2:%.*]] = load ptr, ptr @arrp, align 16
+; DEFAULT-NEXT:    [[TMP3:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 1), align 4
+; DEFAULT-NEXT:    [[TMP4:%.*]] = icmp ugt ptr [[TMP2]], [[TMP3]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], ptr [[TMP2]], ptr [[TMP3]]
+; DEFAULT-NEXT:    [[TMP6:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
+; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ugt ptr [[TMP5]], [[TMP6]]
+; DEFAULT-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], ptr [[TMP5]], ptr [[TMP6]]
+; DEFAULT-NEXT:    [[TMP9:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
+; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp ugt ptr [[TMP8]], [[TMP9]]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], ptr [[TMP8]], ptr [[TMP9]]
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
+; DEFAULT-NEXT:    [[TMP13:%.*]] = icmp ugt ptr [[TMP11]], [[TMP12]]
+; DEFAULT-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], ptr [[TMP11]], ptr [[TMP12]]
+; DEFAULT-NEXT:    [[TMP15:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
+; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp ugt ptr [[TMP14]], [[TMP15]]
+; DEFAULT-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], ptr [[TMP14]], ptr [[TMP15]]
+; DEFAULT-NEXT:    [[TMP18:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
+; DEFAULT-NEXT:    [[TMP19:%.*]] = icmp ugt ptr [[TMP17]], [[TMP18]]
+; DEFAULT-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], ptr [[TMP17]], ptr [[TMP18]]
+; DEFAULT-NEXT:    [[TMP21:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
+; DEFAULT-NEXT:    [[TMP22:%.*]] = icmp ugt ptr [[TMP20]], [[TMP21]]
+; DEFAULT-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], ptr [[TMP20]], ptr [[TMP21]]
+; DEFAULT-NEXT:    ret ptr [[TMP23]]
 ;
 ; THRESH-LABEL: @maxp8(
-; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x i32*>, <2 x i32*>* bitcast ([32 x i32*]* @arrp to <2 x i32*>*), align 16
-; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 0
-; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 1
-; THRESH-NEXT:    [[TMP5:%.*]] = icmp ugt i32* [[TMP3]], [[TMP4]]
-; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32* [[TMP3]], i32* [[TMP4]]
-; THRESH-NEXT:    [[TMP7:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
-; THRESH-NEXT:    [[TMP8:%.*]] = icmp ugt i32* [[TMP6]], [[TMP7]]
-; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32* [[TMP6]], i32* [[TMP7]]
-; THRESH-NEXT:    [[TMP10:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
-; THRESH-NEXT:    [[TMP11:%.*]] = icmp ugt i32* [[TMP9]], [[TMP10]]
-; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32* [[TMP9]], i32* [[TMP10]]
-; THRESH-NEXT:    [[TMP13:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
-; THRESH-NEXT:    [[TMP14:%.*]] = icmp ugt i32* [[TMP12]], [[TMP13]]
-; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32* [[TMP12]], i32* [[TMP13]]
-; THRESH-NEXT:    [[TMP16:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
-; THRESH-NEXT:    [[TMP17:%.*]] = icmp ugt i32* [[TMP15]], [[TMP16]]
-; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32* [[TMP15]], i32* [[TMP16]]
-; THRESH-NEXT:    [[TMP19:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
-; THRESH-NEXT:    [[TMP20:%.*]] = icmp ugt i32* [[TMP18]], [[TMP19]]
-; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32* [[TMP18]], i32* [[TMP19]]
-; THRESH-NEXT:    [[TMP22:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
-; THRESH-NEXT:    [[TMP23:%.*]] = icmp ugt i32* [[TMP21]], [[TMP22]]
-; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], i32* [[TMP21]], i32* [[TMP22]]
-; THRESH-NEXT:    ret i32* [[TMP24]]
+; THRESH-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr @arrp, align 16
+; THRESH-NEXT:    [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
+; THRESH-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 1
+; THRESH-NEXT:    [[TMP5:%.*]] = icmp ugt ptr [[TMP3]], [[TMP4]]
+; THRESH-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], ptr [[TMP3]], ptr [[TMP4]]
+; THRESH-NEXT:    [[TMP7:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
+; THRESH-NEXT:    [[TMP8:%.*]] = icmp ugt ptr [[TMP6]], [[TMP7]]
+; THRESH-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], ptr [[TMP6]], ptr [[TMP7]]
+; THRESH-NEXT:    [[TMP10:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
+; THRESH-NEXT:    [[TMP11:%.*]] = icmp ugt ptr [[TMP9]], [[TMP10]]
+; THRESH-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], ptr [[TMP9]], ptr [[TMP10]]
+; THRESH-NEXT:    [[TMP13:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
+; THRESH-NEXT:    [[TMP14:%.*]] = icmp ugt ptr [[TMP12]], [[TMP13]]
+; THRESH-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], ptr [[TMP12]], ptr [[TMP13]]
+; THRESH-NEXT:    [[TMP16:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
+; THRESH-NEXT:    [[TMP17:%.*]] = icmp ugt ptr [[TMP15]], [[TMP16]]
+; THRESH-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], ptr [[TMP15]], ptr [[TMP16]]
+; THRESH-NEXT:    [[TMP19:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
+; THRESH-NEXT:    [[TMP20:%.*]] = icmp ugt ptr [[TMP18]], [[TMP19]]
+; THRESH-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], ptr [[TMP18]], ptr [[TMP19]]
+; THRESH-NEXT:    [[TMP22:%.*]] = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
+; THRESH-NEXT:    [[TMP23:%.*]] = icmp ugt ptr [[TMP21]], [[TMP22]]
+; THRESH-NEXT:    [[TMP24:%.*]] = select i1 [[TMP23]], ptr [[TMP21]], ptr [[TMP22]]
+; THRESH-NEXT:    ret ptr [[TMP24]]
 ;
-  %2 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
-  %3 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
-  %4 = icmp ugt i32* %2, %3
-  %5 = select i1 %4, i32* %2, i32* %3
-  %6 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
-  %7 = icmp ugt i32* %5, %6
-  %8 = select i1 %7, i32* %5, i32* %6
-  %9 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
-  %10 = icmp ugt i32* %8, %9
-  %11 = select i1 %10, i32* %8, i32* %9
-  %12 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
-  %13 = icmp ugt i32* %11, %12
-  %14 = select i1 %13, i32* %11, i32* %12
-  %15 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
-  %16 = icmp ugt i32* %14, %15
-  %17 = select i1 %16, i32* %14, i32* %15
-  %18 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
-  %19 = icmp ugt i32* %17, %18
-  %20 = select i1 %19, i32* %17, i32* %18
-  %21 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
-  %22 = icmp ugt i32* %20, %21
-  %23 = select i1 %22, i32* %20, i32* %21
-  ret i32* %23
+  %2 = load ptr, ptr @arrp, align 16
+  %3 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 1), align 4
+  %4 = icmp ugt ptr %2, %3
+  %5 = select i1 %4, ptr %2, ptr %3
+  %6 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 2), align 8
+  %7 = icmp ugt ptr %5, %6
+  %8 = select i1 %7, ptr %5, ptr %6
+  %9 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 3), align 4
+  %10 = icmp ugt ptr %8, %9
+  %11 = select i1 %10, ptr %8, ptr %9
+  %12 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 4), align 16
+  %13 = icmp ugt ptr %11, %12
+  %14 = select i1 %13, ptr %11, ptr %12
+  %15 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 5), align 4
+  %16 = icmp ugt ptr %14, %15
+  %17 = select i1 %16, ptr %14, ptr %15
+  %18 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 6), align 8
+  %19 = icmp ugt ptr %17, %18
+  %20 = select i1 %19, ptr %17, ptr %18
+  %21 = load ptr, ptr getelementptr inbounds ([32 x ptr], ptr @arrp, i64 0, i64 7), align 4
+  %22 = icmp ugt ptr %20, %21
+  %23 = select i1 %22, ptr %20, ptr %21
+  ret ptr %23
 }
 
-define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {
+define i32 @smax_intrinsic_rdx_v8i32(ptr %p0) {
 ; SSE-LABEL: @smax_intrinsic_rdx_v8i32(
-; SSE-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
-; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
-; SSE-NEXT:    [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
-; SSE-NEXT:    [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4
-; SSE-NEXT:    [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5
-; SSE-NEXT:    [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6
-; SSE-NEXT:    [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7
-; SSE-NEXT:    [[T0:%.*]] = load i32, i32* [[P0]], align 4
-; SSE-NEXT:    [[T1:%.*]] = load i32, i32* [[P1]], align 4
-; SSE-NEXT:    [[T2:%.*]] = load i32, i32* [[P2]], align 4
-; SSE-NEXT:    [[T3:%.*]] = load i32, i32* [[P3]], align 4
-; SSE-NEXT:    [[T4:%.*]] = load i32, i32* [[P4]], align 4
-; SSE-NEXT:    [[T5:%.*]] = load i32, i32* [[P5]], align 4
-; SSE-NEXT:    [[T6:%.*]] = load i32, i32* [[P6]], align 4
-; SSE-NEXT:    [[T7:%.*]] = load i32, i32* [[P7]], align 4
+; SSE-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr [[P0:%.*]], i64 1
+; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 2
+; SSE-NEXT:    [[P3:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 3
+; SSE-NEXT:    [[P4:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 4
+; SSE-NEXT:    [[P5:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 5
+; SSE-NEXT:    [[P6:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 6
+; SSE-NEXT:    [[P7:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 7
+; SSE-NEXT:    [[T0:%.*]] = load i32, ptr [[P0]], align 4
+; SSE-NEXT:    [[T1:%.*]] = load i32, ptr [[P1]], align 4
+; SSE-NEXT:    [[T2:%.*]] = load i32, ptr [[P2]], align 4
+; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[P3]], align 4
+; SSE-NEXT:    [[T4:%.*]] = load i32, ptr [[P4]], align 4
+; SSE-NEXT:    [[T5:%.*]] = load i32, ptr [[P5]], align 4
+; SSE-NEXT:    [[T6:%.*]] = load i32, ptr [[P6]], align 4
+; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[P7]], align 4
 ; SSE-NEXT:    [[M10:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T1]], i32 [[T0]])
 ; SSE-NEXT:    [[M32:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T3]], i32 [[T2]])
 ; SSE-NEXT:    [[M54:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T5]], i32 [[T4]])
@@ -1241,38 +1241,35 @@ define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {
 ; SSE-NEXT:    ret i32 [[M]]
 ;
 ; AVX-LABEL: @smax_intrinsic_rdx_v8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[P0:%.*]], align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX-NEXT:    ret i32 [[TMP3]]
 ;
 ; AVX2-LABEL: @smax_intrinsic_rdx_v8i32(
-; AVX2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>*
-; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
+; AVX2-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[P0:%.*]], align 4
 ; AVX2-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX2-NEXT:    ret i32 [[TMP3]]
 ;
 ; THRESH-LABEL: @smax_intrinsic_rdx_v8i32(
-; THRESH-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>*
-; THRESH-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
+; THRESH-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[P0:%.*]], align 4
 ; THRESH-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; THRESH-NEXT:    ret i32 [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %p2 = getelementptr inbounds i32, i32* %p0, i64 2
-  %p3 = getelementptr inbounds i32, i32* %p0, i64 3
-  %p4 = getelementptr inbounds i32, i32* %p0, i64 4
-  %p5 = getelementptr inbounds i32, i32* %p0, i64 5
-  %p6 = getelementptr inbounds i32, i32* %p0, i64 6
-  %p7 = getelementptr inbounds i32, i32* %p0, i64 7
-  %t0 = load i32, i32* %p0, align 4
-  %t1 = load i32, i32* %p1, align 4
-  %t2 = load i32, i32* %p2, align 4
-  %t3 = load i32, i32* %p3, align 4
-  %t4 = load i32, i32* %p4, align 4
-  %t5 = load i32, i32* %p5, align 4
-  %t6 = load i32, i32* %p6, align 4
-  %t7 = load i32, i32* %p7, align 4
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i32, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i32, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i32, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i32, ptr %p0, i64 7
+  %t0 = load i32, ptr %p0, align 4
+  %t1 = load i32, ptr %p1, align 4
+  %t2 = load i32, ptr %p2, align 4
+  %t3 = load i32, ptr %p3, align 4
+  %t4 = load i32, ptr %p4, align 4
+  %t5 = load i32, ptr %p5, align 4
+  %t6 = load i32, ptr %p6, align 4
+  %t7 = load i32, ptr %p7, align 4
   %m10 = tail call i32 @llvm.smax.i32(i32 %t1, i32 %t0)
   %m32 = tail call i32 @llvm.smax.i32(i32 %t3, i32 %t2)
   %m54 = tail call i32 @llvm.smax.i32(i32 %t5, i32 %t4)
@@ -1283,28 +1280,27 @@ define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {
   ret i32 %m
 }
 
-define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) {
+define i16 @smin_intrinsic_rdx_v8i16(ptr %p0) {
 ; CHECK-LABEL: @smin_intrinsic_rdx_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret i16 [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %p4 = getelementptr inbounds i16, i16* %p0, i64 4
-  %p5 = getelementptr inbounds i16, i16* %p0, i64 5
-  %p6 = getelementptr inbounds i16, i16* %p0, i64 6
-  %p7 = getelementptr inbounds i16, i16* %p0, i64 7
-  %t0 = load i16, i16* %p0, align 4
-  %t1 = load i16, i16* %p1, align 4
-  %t2 = load i16, i16* %p2, align 4
-  %t3 = load i16, i16* %p3, align 4
-  %t4 = load i16, i16* %p4, align 4
-  %t5 = load i16, i16* %p5, align 4
-  %t6 = load i16, i16* %p6, align 4
-  %t7 = load i16, i16* %p7, align 4
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
+  %t0 = load i16, ptr %p0, align 4
+  %t1 = load i16, ptr %p1, align 4
+  %t2 = load i16, ptr %p2, align 4
+  %t3 = load i16, ptr %p3, align 4
+  %t4 = load i16, ptr %p4, align 4
+  %t5 = load i16, ptr %p5, align 4
+  %t6 = load i16, ptr %p6, align 4
+  %t7 = load i16, ptr %p7, align 4
   %m10 = tail call i16 @llvm.smin.i16(i16 %t1, i16 %t0)
   %m32 = tail call i16 @llvm.smin.i16(i16 %t3, i16 %t2)
   %m54 = tail call i16 @llvm.smin.i16(i16 %t5, i16 %t4)
@@ -1315,77 +1311,75 @@ define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) {
   ret i16 %m
 }
 
-define i64 @umax_intrinsic_rdx_v4i64(i64* %p0) {
+define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
 ; DEFAULT-LABEL: @umax_intrinsic_rdx_v4i64(
-; DEFAULT-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
-; DEFAULT-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
-; DEFAULT-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
-; DEFAULT-NEXT:    [[T0:%.*]] = load i64, i64* [[P0]], align 4
-; DEFAULT-NEXT:    [[T1:%.*]] = load i64, i64* [[P1]], align 4
-; DEFAULT-NEXT:    [[T2:%.*]] = load i64, i64* [[P2]], align 4
-; DEFAULT-NEXT:    [[T3:%.*]] = load i64, i64* [[P3]], align 4
+; DEFAULT-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
+; DEFAULT-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
+; DEFAULT-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
+; DEFAULT-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
+; DEFAULT-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
+; DEFAULT-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
+; DEFAULT-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
 ; DEFAULT-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
 ; DEFAULT-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
 ; DEFAULT-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
 ; DEFAULT-NEXT:    ret i64 [[M]]
 ;
 ; THRESH-LABEL: @umax_intrinsic_rdx_v4i64(
-; THRESH-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P0:%.*]] to <4 x i64>*
-; THRESH-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 4
+; THRESH-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
 ; THRESH-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP2]])
 ; THRESH-NEXT:    ret i64 [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i64, i64* %p0, i64 1
-  %p2 = getelementptr inbounds i64, i64* %p0, i64 2
-  %p3 = getelementptr inbounds i64, i64* %p0, i64 3
-  %t0 = load i64, i64* %p0, align 4
-  %t1 = load i64, i64* %p1, align 4
-  %t2 = load i64, i64* %p2, align 4
-  %t3 = load i64, i64* %p3, align 4
+  %p1 = getelementptr inbounds i64, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i64, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i64, ptr %p0, i64 3
+  %t0 = load i64, ptr %p0, align 4
+  %t1 = load i64, ptr %p1, align 4
+  %t2 = load i64, ptr %p2, align 4
+  %t3 = load i64, ptr %p3, align 4
   %m10 = tail call i64 @llvm.umax.i64(i64 %t1, i64 %t0)
   %m32 = tail call i64 @llvm.umax.i64(i64 %t3, i64 %t2)
   %m = tail call i64 @llvm.umax.i64(i64 %m32, i64 %m10)
   ret i64 %m
 }
 
-define i8 @umin_intrinsic_rdx_v16i8(i8* %p0) {
+define i8 @umin_intrinsic_rdx_v16i8(ptr %p0) {
 ; CHECK-LABEL: @umin_intrinsic_rdx_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP2]])
 ; CHECK-NEXT:    ret i8 [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %p8 = getelementptr inbounds i8, i8* %p0, i64 8
-  %p9 = getelementptr inbounds i8, i8* %p0, i64 9
-  %pa = getelementptr inbounds i8, i8* %p0, i64 10
-  %pb = getelementptr inbounds i8, i8* %p0, i64 11
-  %pc = getelementptr inbounds i8, i8* %p0, i64 12
-  %pd = getelementptr inbounds i8, i8* %p0, i64 13
-  %pe = getelementptr inbounds i8, i8* %p0, i64 14
-  %pf = getelementptr inbounds i8, i8* %p0, i64 15
-  %t0 = load i8, i8* %p0, align 4
-  %t1 = load i8, i8* %p1, align 4
-  %t2 = load i8, i8* %p2, align 4
-  %t3 = load i8, i8* %p3, align 4
-  %t4 = load i8, i8* %p4, align 4
-  %t5 = load i8, i8* %p5, align 4
-  %t6 = load i8, i8* %p6, align 4
-  %t7 = load i8, i8* %p7, align 4
-  %t8 = load i8, i8* %p8, align 4
-  %t9 = load i8, i8* %p9, align 4
-  %ta = load i8, i8* %pa, align 4
-  %tb = load i8, i8* %pb, align 4
-  %tc = load i8, i8* %pc, align 4
-  %td = load i8, i8* %pd, align 4
-  %te = load i8, i8* %pe, align 4
-  %tf = load i8, i8* %pf, align 4
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %p8 = getelementptr inbounds i8, ptr %p0, i64 8
+  %p9 = getelementptr inbounds i8, ptr %p0, i64 9
+  %pa = getelementptr inbounds i8, ptr %p0, i64 10
+  %pb = getelementptr inbounds i8, ptr %p0, i64 11
+  %pc = getelementptr inbounds i8, ptr %p0, i64 12
+  %pd = getelementptr inbounds i8, ptr %p0, i64 13
+  %pe = getelementptr inbounds i8, ptr %p0, i64 14
+  %pf = getelementptr inbounds i8, ptr %p0, i64 15
+  %t0 = load i8, ptr %p0, align 4
+  %t1 = load i8, ptr %p1, align 4
+  %t2 = load i8, ptr %p2, align 4
+  %t3 = load i8, ptr %p3, align 4
+  %t4 = load i8, ptr %p4, align 4
+  %t5 = load i8, ptr %p5, align 4
+  %t6 = load i8, ptr %p6, align 4
+  %t7 = load i8, ptr %p7, align 4
+  %t8 = load i8, ptr %p8, align 4
+  %t9 = load i8, ptr %p9, align 4
+  %ta = load i8, ptr %pa, align 4
+  %tb = load i8, ptr %pb, align 4
+  %tc = load i8, ptr %pc, align 4
+  %td = load i8, ptr %pd, align 4
+  %te = load i8, ptr %pe, align 4
+  %tf = load i8, ptr %pf, align 4
   %m10 = tail call i8 @llvm.umin.i8(i8 %t1, i8 %t0)
   %m32 = tail call i8 @llvm.umin.i8(i8 %t3, i8 %t2)
   %m54 = tail call i8 @llvm.umin.i8(i8 %t5, i8 %t4)

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
index 5dad970cea011..491658806a5d2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
@@ -9,37 +9,37 @@ declare i32 @llvm.smax.i32(i32, i32)
 
 define i32 @smax_v2i32(i32) {
 ; CHECK-LABEL: @smax_v2i32(
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[TMP4]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
   %4 = call i32 @llvm.smax.i32(i32 %2, i32 %3)
   ret i32 %4
 }
 
 define i32 @smax_v4i32(i32) {
 ; SSE-LABEL: @smax_v4i32(
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP2]], i32 [[TMP3]])
 ; SSE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP6]], i32 [[TMP4]])
 ; SSE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP7]], i32 [[TMP5]])
 ; SSE-NEXT:    ret i32 [[TMP8]]
 ;
 ; AVX-LABEL: @smax_v4i32(
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @arr, align 16
 ; AVX-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
 ; AVX-NEXT:    ret i32 [[TMP3]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
-  %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
-  %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+  %4 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+  %5 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
   %6 = call i32 @llvm.smax.i32(i32 %2, i32 %3)
   %7 = call i32 @llvm.smax.i32(i32 %6, i32 %4)
   %8 = call i32 @llvm.smax.i32(i32 %7, i32 %5)
@@ -48,14 +48,14 @@ define i32 @smax_v4i32(i32) {
 
 define i32 @smax_v8i32(i32) {
 ; SSE-LABEL: @smax_v8i32(
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr @arr, align 16
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
 ; SSE-NEXT:    [[TMP10:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP2]], i32 [[TMP3]])
 ; SSE-NEXT:    [[TMP11:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP10]], i32 [[TMP4]])
 ; SSE-NEXT:    [[TMP12:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP11]], i32 [[TMP5]])
@@ -66,18 +66,18 @@ define i32 @smax_v8i32(i32) {
 ; SSE-NEXT:    ret i32 [[TMP16]]
 ;
 ; AVX-LABEL: @smax_v8i32(
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @arr, align 16
 ; AVX-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
 ; AVX-NEXT:    ret i32 [[TMP3]]
 ;
-  %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
-  %4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
-  %5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
-  %6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
-  %7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
-  %8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-  %9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
+  %2 = load i32, ptr @arr, align 16
+  %3 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+  %4 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+  %5 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
+  %6 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
+  %7 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
+  %8 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+  %9 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
   %10 = call i32 @llvm.smax.i32(i32 %2,  i32 %3)
   %11 = call i32 @llvm.smax.i32(i32 %10, i32 %4)
   %12 = call i32 @llvm.smax.i32(i32 %11, i32 %5)
@@ -90,26 +90,26 @@ define i32 @smax_v8i32(i32) {
 
 define i32 @smax_v16i32(i32) {
 ; CHECK-LABEL: @smax_v16i32(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @arr, align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
-  %2  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
-  %3  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
-  %4  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
-  %5  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
-  %6  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
-  %7  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
-  %8  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
-  %9  = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
-  %10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
-  %11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
-  %12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
-  %13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
-  %14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
-  %15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
-  %16 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
-  %17 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
+  %2  = load i32, ptr @arr, align 16
+  %3  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 1), align 4
+  %4  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8
+  %5  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 3), align 4
+  %6  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 4), align 16
+  %7  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 5), align 4
+  %8  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
+  %9  = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
+  %10 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 8), align 16
+  %11 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 9), align 4
+  %12 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 10), align 8
+  %13 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 11), align 4
+  %14 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 12), align 16
+  %15 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 13), align 4
+  %16 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 14), align 8
+  %17 = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 15), align 4
   %18 = call i32 @llvm.smax.i32(i32 %2,  i32 %3)
   %19 = call i32 @llvm.smax.i32(i32 %18, i32 %4)
   %20 = call i32 @llvm.smax.i32(i32 %19, i32 %5)

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll b/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
index a773670c5a9d5..f5aa953678751 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
@@ -5,34 +5,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Don't vectorize when noimplicitfloat is used.
-define void @test1(double* %a, double* %b, double* %c) noimplicitfloat { ; <------ noimplicitfloat attribute here!
+define void @test1(ptr %a, ptr %b, ptr %c) noimplicitfloat { ; <------ noimplicitfloat attribute here!
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[I0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load double, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
-; CHECK-NEXT:    store double [[MUL]], double* [[C:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
-; CHECK-NEXT:    store double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    store double [[MUL]], ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[C]], i64 1
+; CHECK-NEXT:    store double [[MUL5]], ptr [[ARRAYIDX5]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
index cff074788d4ca..dcc012c9cd805 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 @.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
 
 ; Uses inside the tree must be scheduled after the corresponding tree bundle.
-define void @in_tree_user(double* nocapture %A, i32 %n) {
+define void @in_tree_user(ptr nocapture %A, i32 %n) {
 ; CHECK-LABEL: @in_tree_user(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
@@ -17,9 +17,8 @@ define void @in_tree_user(double* nocapture %A, i32 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 7.000000e+00, double 4.000000e+00>
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], <double 5.000000e+00, double 9.000000e+00>
@@ -29,7 +28,7 @@ define void @in_tree_user(double* nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]]
 ; CHECK-NEXT:    br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str)
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
@@ -37,7 +36,7 @@ define void @in_tree_user(double* nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    store double [[INTREEUSER]], double* [[A]], align 8
+; CHECK-NEXT:    store double [[INTREEUSER]], ptr [[A]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -47,15 +46,15 @@ entry:
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %0 = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %0
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %0
+  %1 = load double, ptr %arrayidx, align 8
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
   %InTreeUser = fadd double %add, %add    ; <------------------ In tree user.
   %2 = or i64 %0, 1
-  %arrayidx6 = getelementptr inbounds double, double* %A, i64 %2
-  %3 = load double, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %A, i64 %2
+  %3 = load double, ptr %arrayidx6, align 8
   %mul8 = fmul double %conv, %3
   %mul9 = fmul double %mul8, 4.000000e+00
   %add10 = fadd double %mul9, 9.000000e+00
@@ -63,7 +62,7 @@ for.body:                                         ; preds = %for.inc, %entry
   br i1 %cmp11, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+  %call = tail call i32 (ptr, ...) @printf(ptr @.str)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -73,9 +72,9 @@ for.inc:                                          ; preds = %for.body, %if.then
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.inc
-  store double %InTreeUser, double* %A, align 8   ; Avoid dead code elimination of the InTreeUser.
+  store double %InTreeUser, ptr %A, align 8   ; Avoid dead code elimination of the InTreeUser.
   ret void
 }
 
-declare i32 @printf(i8* nocapture, ...)
+declare i32 @printf(ptr nocapture, ...)
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
index 33074f593a6ce..3a35a63c15981 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
@@ -21,7 +21,7 @@ entry:
   ret i8 %retval.0
 }
 
-define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i8* noalias nocapture readonly %d, i8* noalias nocapture %e, i32 %w) local_unnamed_addr #1 {
+define void @bar(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, ptr noalias nocapture readonly %d, ptr noalias nocapture %e, i32 %w) local_unnamed_addr #1 {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[W:%.*]], i32 0
@@ -29,32 +29,27 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[A_ADDR_0355:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[E_ADDR_0354:%.*]] = phi i8* [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>*
-; CHECK-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[A_ADDR_0355:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[E_ADDR_0354:%.*]] = phi ptr [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[D_ADDR_0353:%.*]] = phi ptr [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_ADDR_0352:%.*]] = phi ptr [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_0351:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[C_ADDR_0352]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[D_ADDR_0353]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[A_ADDR_0355]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr [[B_ADDR_0351]], align 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult <16 x i8> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP6]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul <16 x i32> [[TMP11]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP13]], <16 x i8>* [[TMP14]], align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP13]], ptr [[E_ADDR_0354]], align 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0356]], 1
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
-; CHECK-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
-; CHECK-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 16
-; CHECK-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 16
-; CHECK-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 16
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_0355]], i64 16
+; CHECK-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, ptr [[B_ADDR_0351]], i64 16
+; CHECK-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, ptr [[C_ADDR_0352]], i64 16
+; CHECK-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, ptr [[D_ADDR_0353]], i64 16
+; CHECK-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, ptr [[E_ADDR_0354]], i64 16
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 8
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
@@ -65,252 +60,252 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.0356 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %a.addr.0355 = phi i8* [ %a, %entry ], [ %add.ptr, %for.body ]
-  %e.addr.0354 = phi i8* [ %e, %entry ], [ %add.ptr192, %for.body ]
-  %d.addr.0353 = phi i8* [ %d, %entry ], [ %add.ptr191, %for.body ]
-  %c.addr.0352 = phi i8* [ %c, %entry ], [ %add.ptr190, %for.body ]
-  %b.addr.0351 = phi i8* [ %b, %entry ], [ %add.ptr189, %for.body ]
-  %0 = load i8, i8* %c.addr.0352, align 1
-  %1 = load i8, i8* %d.addr.0353, align 1
-  %2 = load i8, i8* %a.addr.0355, align 1
-  %3 = load i8, i8* %b.addr.0351, align 1
+  %a.addr.0355 = phi ptr [ %a, %entry ], [ %add.ptr, %for.body ]
+  %e.addr.0354 = phi ptr [ %e, %entry ], [ %add.ptr192, %for.body ]
+  %d.addr.0353 = phi ptr [ %d, %entry ], [ %add.ptr191, %for.body ]
+  %c.addr.0352 = phi ptr [ %c, %entry ], [ %add.ptr190, %for.body ]
+  %b.addr.0351 = phi ptr [ %b, %entry ], [ %add.ptr189, %for.body ]
+  %0 = load i8, ptr %c.addr.0352, align 1
+  %1 = load i8, ptr %d.addr.0353, align 1
+  %2 = load i8, ptr %a.addr.0355, align 1
+  %3 = load i8, ptr %b.addr.0351, align 1
   %cmp.i = icmp ult i8 %0, %1
   %b.a.i.v.v = select i1 %cmp.i, i8 %3, i8 %2
   %b.a.i.v = zext i8 %b.a.i.v.v to i32
   %b.a.i = mul i32 %b.a.i.v, %w
   %retval.0.i = trunc i32 %b.a.i to i8
-  store i8 %retval.0.i, i8* %e.addr.0354, align 1
-  %arrayidx9 = getelementptr inbounds i8, i8* %c.addr.0352, i64 1
-  %4 = load i8, i8* %arrayidx9, align 1
-  %arrayidx11 = getelementptr inbounds i8, i8* %d.addr.0353, i64 1
-  %5 = load i8, i8* %arrayidx11, align 1
-  %arrayidx13 = getelementptr inbounds i8, i8* %a.addr.0355, i64 1
-  %6 = load i8, i8* %arrayidx13, align 1
-  %arrayidx16 = getelementptr inbounds i8, i8* %b.addr.0351, i64 1
-  %7 = load i8, i8* %arrayidx16, align 1
+  store i8 %retval.0.i, ptr %e.addr.0354, align 1
+  %arrayidx9 = getelementptr inbounds i8, ptr %c.addr.0352, i64 1
+  %4 = load i8, ptr %arrayidx9, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %d.addr.0353, i64 1
+  %5 = load i8, ptr %arrayidx11, align 1
+  %arrayidx13 = getelementptr inbounds i8, ptr %a.addr.0355, i64 1
+  %6 = load i8, ptr %arrayidx13, align 1
+  %arrayidx16 = getelementptr inbounds i8, ptr %b.addr.0351, i64 1
+  %7 = load i8, ptr %arrayidx16, align 1
   %cmp.i348 = icmp ult i8 %4, %5
   %b.a.i349.v.v = select i1 %cmp.i348, i8 %7, i8 %6
   %b.a.i349.v = zext i8 %b.a.i349.v.v to i32
   %b.a.i349 = mul i32 %b.a.i349.v, %w
   %retval.0.i350 = trunc i32 %b.a.i349 to i8
-  %arrayidx20 = getelementptr inbounds i8, i8* %e.addr.0354, i64 1
-  store i8 %retval.0.i350, i8* %arrayidx20, align 1
-  %arrayidx21 = getelementptr inbounds i8, i8* %c.addr.0352, i64 2
-  %8 = load i8, i8* %arrayidx21, align 1
-  %arrayidx23 = getelementptr inbounds i8, i8* %d.addr.0353, i64 2
-  %9 = load i8, i8* %arrayidx23, align 1
-  %arrayidx25 = getelementptr inbounds i8, i8* %a.addr.0355, i64 2
-  %10 = load i8, i8* %arrayidx25, align 1
-  %arrayidx28 = getelementptr inbounds i8, i8* %b.addr.0351, i64 2
-  %11 = load i8, i8* %arrayidx28, align 1
+  %arrayidx20 = getelementptr inbounds i8, ptr %e.addr.0354, i64 1
+  store i8 %retval.0.i350, ptr %arrayidx20, align 1
+  %arrayidx21 = getelementptr inbounds i8, ptr %c.addr.0352, i64 2
+  %8 = load i8, ptr %arrayidx21, align 1
+  %arrayidx23 = getelementptr inbounds i8, ptr %d.addr.0353, i64 2
+  %9 = load i8, ptr %arrayidx23, align 1
+  %arrayidx25 = getelementptr inbounds i8, ptr %a.addr.0355, i64 2
+  %10 = load i8, ptr %arrayidx25, align 1
+  %arrayidx28 = getelementptr inbounds i8, ptr %b.addr.0351, i64 2
+  %11 = load i8, ptr %arrayidx28, align 1
   %cmp.i345 = icmp ult i8 %8, %9
   %b.a.i346.v.v = select i1 %cmp.i345, i8 %11, i8 %10
   %b.a.i346.v = zext i8 %b.a.i346.v.v to i32
   %b.a.i346 = mul i32 %b.a.i346.v, %w
   %retval.0.i347 = trunc i32 %b.a.i346 to i8
-  %arrayidx32 = getelementptr inbounds i8, i8* %e.addr.0354, i64 2
-  store i8 %retval.0.i347, i8* %arrayidx32, align 1
-  %arrayidx33 = getelementptr inbounds i8, i8* %c.addr.0352, i64 3
-  %12 = load i8, i8* %arrayidx33, align 1
-  %arrayidx35 = getelementptr inbounds i8, i8* %d.addr.0353, i64 3
-  %13 = load i8, i8* %arrayidx35, align 1
-  %arrayidx37 = getelementptr inbounds i8, i8* %a.addr.0355, i64 3
-  %14 = load i8, i8* %arrayidx37, align 1
-  %arrayidx40 = getelementptr inbounds i8, i8* %b.addr.0351, i64 3
-  %15 = load i8, i8* %arrayidx40, align 1
+  %arrayidx32 = getelementptr inbounds i8, ptr %e.addr.0354, i64 2
+  store i8 %retval.0.i347, ptr %arrayidx32, align 1
+  %arrayidx33 = getelementptr inbounds i8, ptr %c.addr.0352, i64 3
+  %12 = load i8, ptr %arrayidx33, align 1
+  %arrayidx35 = getelementptr inbounds i8, ptr %d.addr.0353, i64 3
+  %13 = load i8, ptr %arrayidx35, align 1
+  %arrayidx37 = getelementptr inbounds i8, ptr %a.addr.0355, i64 3
+  %14 = load i8, ptr %arrayidx37, align 1
+  %arrayidx40 = getelementptr inbounds i8, ptr %b.addr.0351, i64 3
+  %15 = load i8, ptr %arrayidx40, align 1
   %cmp.i342 = icmp ult i8 %12, %13
   %b.a.i343.v.v = select i1 %cmp.i342, i8 %15, i8 %14
   %b.a.i343.v = zext i8 %b.a.i343.v.v to i32
   %b.a.i343 = mul i32 %b.a.i343.v, %w
   %retval.0.i344 = trunc i32 %b.a.i343 to i8
-  %arrayidx44 = getelementptr inbounds i8, i8* %e.addr.0354, i64 3
-  store i8 %retval.0.i344, i8* %arrayidx44, align 1
-  %arrayidx45 = getelementptr inbounds i8, i8* %c.addr.0352, i64 4
-  %16 = load i8, i8* %arrayidx45, align 1
-  %arrayidx47 = getelementptr inbounds i8, i8* %d.addr.0353, i64 4
-  %17 = load i8, i8* %arrayidx47, align 1
-  %arrayidx49 = getelementptr inbounds i8, i8* %a.addr.0355, i64 4
-  %18 = load i8, i8* %arrayidx49, align 1
-  %arrayidx52 = getelementptr inbounds i8, i8* %b.addr.0351, i64 4
-  %19 = load i8, i8* %arrayidx52, align 1
+  %arrayidx44 = getelementptr inbounds i8, ptr %e.addr.0354, i64 3
+  store i8 %retval.0.i344, ptr %arrayidx44, align 1
+  %arrayidx45 = getelementptr inbounds i8, ptr %c.addr.0352, i64 4
+  %16 = load i8, ptr %arrayidx45, align 1
+  %arrayidx47 = getelementptr inbounds i8, ptr %d.addr.0353, i64 4
+  %17 = load i8, ptr %arrayidx47, align 1
+  %arrayidx49 = getelementptr inbounds i8, ptr %a.addr.0355, i64 4
+  %18 = load i8, ptr %arrayidx49, align 1
+  %arrayidx52 = getelementptr inbounds i8, ptr %b.addr.0351, i64 4
+  %19 = load i8, ptr %arrayidx52, align 1
   %cmp.i339 = icmp ult i8 %16, %17
   %b.a.i340.v.v = select i1 %cmp.i339, i8 %19, i8 %18
   %b.a.i340.v = zext i8 %b.a.i340.v.v to i32
   %b.a.i340 = mul i32 %b.a.i340.v, %w
   %retval.0.i341 = trunc i32 %b.a.i340 to i8
-  %arrayidx56 = getelementptr inbounds i8, i8* %e.addr.0354, i64 4
-  store i8 %retval.0.i341, i8* %arrayidx56, align 1
-  %arrayidx57 = getelementptr inbounds i8, i8* %c.addr.0352, i64 5
-  %20 = load i8, i8* %arrayidx57, align 1
-  %arrayidx59 = getelementptr inbounds i8, i8* %d.addr.0353, i64 5
-  %21 = load i8, i8* %arrayidx59, align 1
-  %arrayidx61 = getelementptr inbounds i8, i8* %a.addr.0355, i64 5
-  %22 = load i8, i8* %arrayidx61, align 1
-  %arrayidx64 = getelementptr inbounds i8, i8* %b.addr.0351, i64 5
-  %23 = load i8, i8* %arrayidx64, align 1
+  %arrayidx56 = getelementptr inbounds i8, ptr %e.addr.0354, i64 4
+  store i8 %retval.0.i341, ptr %arrayidx56, align 1
+  %arrayidx57 = getelementptr inbounds i8, ptr %c.addr.0352, i64 5
+  %20 = load i8, ptr %arrayidx57, align 1
+  %arrayidx59 = getelementptr inbounds i8, ptr %d.addr.0353, i64 5
+  %21 = load i8, ptr %arrayidx59, align 1
+  %arrayidx61 = getelementptr inbounds i8, ptr %a.addr.0355, i64 5
+  %22 = load i8, ptr %arrayidx61, align 1
+  %arrayidx64 = getelementptr inbounds i8, ptr %b.addr.0351, i64 5
+  %23 = load i8, ptr %arrayidx64, align 1
   %cmp.i336 = icmp ult i8 %20, %21
   %b.a.i337.v.v = select i1 %cmp.i336, i8 %23, i8 %22
   %b.a.i337.v = zext i8 %b.a.i337.v.v to i32
   %b.a.i337 = mul i32 %b.a.i337.v, %w
   %retval.0.i338 = trunc i32 %b.a.i337 to i8
-  %arrayidx68 = getelementptr inbounds i8, i8* %e.addr.0354, i64 5
-  store i8 %retval.0.i338, i8* %arrayidx68, align 1
-  %arrayidx69 = getelementptr inbounds i8, i8* %c.addr.0352, i64 6
-  %24 = load i8, i8* %arrayidx69, align 1
-  %arrayidx71 = getelementptr inbounds i8, i8* %d.addr.0353, i64 6
-  %25 = load i8, i8* %arrayidx71, align 1
-  %arrayidx73 = getelementptr inbounds i8, i8* %a.addr.0355, i64 6
-  %26 = load i8, i8* %arrayidx73, align 1
-  %arrayidx76 = getelementptr inbounds i8, i8* %b.addr.0351, i64 6
-  %27 = load i8, i8* %arrayidx76, align 1
+  %arrayidx68 = getelementptr inbounds i8, ptr %e.addr.0354, i64 5
+  store i8 %retval.0.i338, ptr %arrayidx68, align 1
+  %arrayidx69 = getelementptr inbounds i8, ptr %c.addr.0352, i64 6
+  %24 = load i8, ptr %arrayidx69, align 1
+  %arrayidx71 = getelementptr inbounds i8, ptr %d.addr.0353, i64 6
+  %25 = load i8, ptr %arrayidx71, align 1
+  %arrayidx73 = getelementptr inbounds i8, ptr %a.addr.0355, i64 6
+  %26 = load i8, ptr %arrayidx73, align 1
+  %arrayidx76 = getelementptr inbounds i8, ptr %b.addr.0351, i64 6
+  %27 = load i8, ptr %arrayidx76, align 1
   %cmp.i333 = icmp ult i8 %24, %25
   %b.a.i334.v.v = select i1 %cmp.i333, i8 %27, i8 %26
   %b.a.i334.v = zext i8 %b.a.i334.v.v to i32
   %b.a.i334 = mul i32 %b.a.i334.v, %w
   %retval.0.i335 = trunc i32 %b.a.i334 to i8
-  %arrayidx80 = getelementptr inbounds i8, i8* %e.addr.0354, i64 6
-  store i8 %retval.0.i335, i8* %arrayidx80, align 1
-  %arrayidx81 = getelementptr inbounds i8, i8* %c.addr.0352, i64 7
-  %28 = load i8, i8* %arrayidx81, align 1
-  %arrayidx83 = getelementptr inbounds i8, i8* %d.addr.0353, i64 7
-  %29 = load i8, i8* %arrayidx83, align 1
-  %arrayidx85 = getelementptr inbounds i8, i8* %a.addr.0355, i64 7
-  %30 = load i8, i8* %arrayidx85, align 1
-  %arrayidx88 = getelementptr inbounds i8, i8* %b.addr.0351, i64 7
-  %31 = load i8, i8* %arrayidx88, align 1
+  %arrayidx80 = getelementptr inbounds i8, ptr %e.addr.0354, i64 6
+  store i8 %retval.0.i335, ptr %arrayidx80, align 1
+  %arrayidx81 = getelementptr inbounds i8, ptr %c.addr.0352, i64 7
+  %28 = load i8, ptr %arrayidx81, align 1
+  %arrayidx83 = getelementptr inbounds i8, ptr %d.addr.0353, i64 7
+  %29 = load i8, ptr %arrayidx83, align 1
+  %arrayidx85 = getelementptr inbounds i8, ptr %a.addr.0355, i64 7
+  %30 = load i8, ptr %arrayidx85, align 1
+  %arrayidx88 = getelementptr inbounds i8, ptr %b.addr.0351, i64 7
+  %31 = load i8, ptr %arrayidx88, align 1
   %cmp.i330 = icmp ult i8 %28, %29
   %b.a.i331.v.v = select i1 %cmp.i330, i8 %31, i8 %30
   %b.a.i331.v = zext i8 %b.a.i331.v.v to i32
   %b.a.i331 = mul i32 %b.a.i331.v, %w
   %retval.0.i332 = trunc i32 %b.a.i331 to i8
-  %arrayidx92 = getelementptr inbounds i8, i8* %e.addr.0354, i64 7
-  store i8 %retval.0.i332, i8* %arrayidx92, align 1
-  %arrayidx93 = getelementptr inbounds i8, i8* %c.addr.0352, i64 8
-  %32 = load i8, i8* %arrayidx93, align 1
-  %arrayidx95 = getelementptr inbounds i8, i8* %d.addr.0353, i64 8
-  %33 = load i8, i8* %arrayidx95, align 1
-  %arrayidx97 = getelementptr inbounds i8, i8* %a.addr.0355, i64 8
-  %34 = load i8, i8* %arrayidx97, align 1
-  %arrayidx100 = getelementptr inbounds i8, i8* %b.addr.0351, i64 8
-  %35 = load i8, i8* %arrayidx100, align 1
+  %arrayidx92 = getelementptr inbounds i8, ptr %e.addr.0354, i64 7
+  store i8 %retval.0.i332, ptr %arrayidx92, align 1
+  %arrayidx93 = getelementptr inbounds i8, ptr %c.addr.0352, i64 8
+  %32 = load i8, ptr %arrayidx93, align 1
+  %arrayidx95 = getelementptr inbounds i8, ptr %d.addr.0353, i64 8
+  %33 = load i8, ptr %arrayidx95, align 1
+  %arrayidx97 = getelementptr inbounds i8, ptr %a.addr.0355, i64 8
+  %34 = load i8, ptr %arrayidx97, align 1
+  %arrayidx100 = getelementptr inbounds i8, ptr %b.addr.0351, i64 8
+  %35 = load i8, ptr %arrayidx100, align 1
   %cmp.i327 = icmp ult i8 %32, %33
   %b.a.i328.v.v = select i1 %cmp.i327, i8 %35, i8 %34
   %b.a.i328.v = zext i8 %b.a.i328.v.v to i32
   %b.a.i328 = mul i32 %b.a.i328.v, %w
   %retval.0.i329 = trunc i32 %b.a.i328 to i8
-  %arrayidx104 = getelementptr inbounds i8, i8* %e.addr.0354, i64 8
-  store i8 %retval.0.i329, i8* %arrayidx104, align 1
-  %arrayidx105 = getelementptr inbounds i8, i8* %c.addr.0352, i64 9
-  %36 = load i8, i8* %arrayidx105, align 1
-  %arrayidx107 = getelementptr inbounds i8, i8* %d.addr.0353, i64 9
-  %37 = load i8, i8* %arrayidx107, align 1
-  %arrayidx109 = getelementptr inbounds i8, i8* %a.addr.0355, i64 9
-  %38 = load i8, i8* %arrayidx109, align 1
-  %arrayidx112 = getelementptr inbounds i8, i8* %b.addr.0351, i64 9
-  %39 = load i8, i8* %arrayidx112, align 1
+  %arrayidx104 = getelementptr inbounds i8, ptr %e.addr.0354, i64 8
+  store i8 %retval.0.i329, ptr %arrayidx104, align 1
+  %arrayidx105 = getelementptr inbounds i8, ptr %c.addr.0352, i64 9
+  %36 = load i8, ptr %arrayidx105, align 1
+  %arrayidx107 = getelementptr inbounds i8, ptr %d.addr.0353, i64 9
+  %37 = load i8, ptr %arrayidx107, align 1
+  %arrayidx109 = getelementptr inbounds i8, ptr %a.addr.0355, i64 9
+  %38 = load i8, ptr %arrayidx109, align 1
+  %arrayidx112 = getelementptr inbounds i8, ptr %b.addr.0351, i64 9
+  %39 = load i8, ptr %arrayidx112, align 1
   %cmp.i324 = icmp ult i8 %36, %37
   %b.a.i325.v.v = select i1 %cmp.i324, i8 %39, i8 %38
   %b.a.i325.v = zext i8 %b.a.i325.v.v to i32
   %b.a.i325 = mul i32 %b.a.i325.v, %w
   %retval.0.i326 = trunc i32 %b.a.i325 to i8
-  %arrayidx116 = getelementptr inbounds i8, i8* %e.addr.0354, i64 9
-  store i8 %retval.0.i326, i8* %arrayidx116, align 1
-  %arrayidx117 = getelementptr inbounds i8, i8* %c.addr.0352, i64 10
-  %40 = load i8, i8* %arrayidx117, align 1
-  %arrayidx119 = getelementptr inbounds i8, i8* %d.addr.0353, i64 10
-  %41 = load i8, i8* %arrayidx119, align 1
-  %arrayidx121 = getelementptr inbounds i8, i8* %a.addr.0355, i64 10
-  %42 = load i8, i8* %arrayidx121, align 1
-  %arrayidx124 = getelementptr inbounds i8, i8* %b.addr.0351, i64 10
-  %43 = load i8, i8* %arrayidx124, align 1
+  %arrayidx116 = getelementptr inbounds i8, ptr %e.addr.0354, i64 9
+  store i8 %retval.0.i326, ptr %arrayidx116, align 1
+  %arrayidx117 = getelementptr inbounds i8, ptr %c.addr.0352, i64 10
+  %40 = load i8, ptr %arrayidx117, align 1
+  %arrayidx119 = getelementptr inbounds i8, ptr %d.addr.0353, i64 10
+  %41 = load i8, ptr %arrayidx119, align 1
+  %arrayidx121 = getelementptr inbounds i8, ptr %a.addr.0355, i64 10
+  %42 = load i8, ptr %arrayidx121, align 1
+  %arrayidx124 = getelementptr inbounds i8, ptr %b.addr.0351, i64 10
+  %43 = load i8, ptr %arrayidx124, align 1
   %cmp.i321 = icmp ult i8 %40, %41
   %b.a.i322.v.v = select i1 %cmp.i321, i8 %43, i8 %42
   %b.a.i322.v = zext i8 %b.a.i322.v.v to i32
   %b.a.i322 = mul i32 %b.a.i322.v, %w
   %retval.0.i323 = trunc i32 %b.a.i322 to i8
-  %arrayidx128 = getelementptr inbounds i8, i8* %e.addr.0354, i64 10
-  store i8 %retval.0.i323, i8* %arrayidx128, align 1
-  %arrayidx129 = getelementptr inbounds i8, i8* %c.addr.0352, i64 11
-  %44 = load i8, i8* %arrayidx129, align 1
-  %arrayidx131 = getelementptr inbounds i8, i8* %d.addr.0353, i64 11
-  %45 = load i8, i8* %arrayidx131, align 1
-  %arrayidx133 = getelementptr inbounds i8, i8* %a.addr.0355, i64 11
-  %46 = load i8, i8* %arrayidx133, align 1
-  %arrayidx136 = getelementptr inbounds i8, i8* %b.addr.0351, i64 11
-  %47 = load i8, i8* %arrayidx136, align 1
+  %arrayidx128 = getelementptr inbounds i8, ptr %e.addr.0354, i64 10
+  store i8 %retval.0.i323, ptr %arrayidx128, align 1
+  %arrayidx129 = getelementptr inbounds i8, ptr %c.addr.0352, i64 11
+  %44 = load i8, ptr %arrayidx129, align 1
+  %arrayidx131 = getelementptr inbounds i8, ptr %d.addr.0353, i64 11
+  %45 = load i8, ptr %arrayidx131, align 1
+  %arrayidx133 = getelementptr inbounds i8, ptr %a.addr.0355, i64 11
+  %46 = load i8, ptr %arrayidx133, align 1
+  %arrayidx136 = getelementptr inbounds i8, ptr %b.addr.0351, i64 11
+  %47 = load i8, ptr %arrayidx136, align 1
   %cmp.i318 = icmp ult i8 %44, %45
   %b.a.i319.v.v = select i1 %cmp.i318, i8 %47, i8 %46
   %b.a.i319.v = zext i8 %b.a.i319.v.v to i32
   %b.a.i319 = mul i32 %b.a.i319.v, %w
   %retval.0.i320 = trunc i32 %b.a.i319 to i8
-  %arrayidx140 = getelementptr inbounds i8, i8* %e.addr.0354, i64 11
-  store i8 %retval.0.i320, i8* %arrayidx140, align 1
-  %arrayidx141 = getelementptr inbounds i8, i8* %c.addr.0352, i64 12
-  %48 = load i8, i8* %arrayidx141, align 1
-  %arrayidx143 = getelementptr inbounds i8, i8* %d.addr.0353, i64 12
-  %49 = load i8, i8* %arrayidx143, align 1
-  %arrayidx145 = getelementptr inbounds i8, i8* %a.addr.0355, i64 12
-  %50 = load i8, i8* %arrayidx145, align 1
-  %arrayidx148 = getelementptr inbounds i8, i8* %b.addr.0351, i64 12
-  %51 = load i8, i8* %arrayidx148, align 1
+  %arrayidx140 = getelementptr inbounds i8, ptr %e.addr.0354, i64 11
+  store i8 %retval.0.i320, ptr %arrayidx140, align 1
+  %arrayidx141 = getelementptr inbounds i8, ptr %c.addr.0352, i64 12
+  %48 = load i8, ptr %arrayidx141, align 1
+  %arrayidx143 = getelementptr inbounds i8, ptr %d.addr.0353, i64 12
+  %49 = load i8, ptr %arrayidx143, align 1
+  %arrayidx145 = getelementptr inbounds i8, ptr %a.addr.0355, i64 12
+  %50 = load i8, ptr %arrayidx145, align 1
+  %arrayidx148 = getelementptr inbounds i8, ptr %b.addr.0351, i64 12
+  %51 = load i8, ptr %arrayidx148, align 1
   %cmp.i315 = icmp ult i8 %48, %49
   %b.a.i316.v.v = select i1 %cmp.i315, i8 %51, i8 %50
   %b.a.i316.v = zext i8 %b.a.i316.v.v to i32
   %b.a.i316 = mul i32 %b.a.i316.v, %w
   %retval.0.i317 = trunc i32 %b.a.i316 to i8
-  %arrayidx152 = getelementptr inbounds i8, i8* %e.addr.0354, i64 12
-  store i8 %retval.0.i317, i8* %arrayidx152, align 1
-  %arrayidx153 = getelementptr inbounds i8, i8* %c.addr.0352, i64 13
-  %52 = load i8, i8* %arrayidx153, align 1
-  %arrayidx155 = getelementptr inbounds i8, i8* %d.addr.0353, i64 13
-  %53 = load i8, i8* %arrayidx155, align 1
-  %arrayidx157 = getelementptr inbounds i8, i8* %a.addr.0355, i64 13
-  %54 = load i8, i8* %arrayidx157, align 1
-  %arrayidx160 = getelementptr inbounds i8, i8* %b.addr.0351, i64 13
-  %55 = load i8, i8* %arrayidx160, align 1
+  %arrayidx152 = getelementptr inbounds i8, ptr %e.addr.0354, i64 12
+  store i8 %retval.0.i317, ptr %arrayidx152, align 1
+  %arrayidx153 = getelementptr inbounds i8, ptr %c.addr.0352, i64 13
+  %52 = load i8, ptr %arrayidx153, align 1
+  %arrayidx155 = getelementptr inbounds i8, ptr %d.addr.0353, i64 13
+  %53 = load i8, ptr %arrayidx155, align 1
+  %arrayidx157 = getelementptr inbounds i8, ptr %a.addr.0355, i64 13
+  %54 = load i8, ptr %arrayidx157, align 1
+  %arrayidx160 = getelementptr inbounds i8, ptr %b.addr.0351, i64 13
+  %55 = load i8, ptr %arrayidx160, align 1
   %cmp.i312 = icmp ult i8 %52, %53
   %b.a.i313.v.v = select i1 %cmp.i312, i8 %55, i8 %54
   %b.a.i313.v = zext i8 %b.a.i313.v.v to i32
   %b.a.i313 = mul i32 %b.a.i313.v, %w
   %retval.0.i314 = trunc i32 %b.a.i313 to i8
-  %arrayidx164 = getelementptr inbounds i8, i8* %e.addr.0354, i64 13
-  store i8 %retval.0.i314, i8* %arrayidx164, align 1
-  %arrayidx165 = getelementptr inbounds i8, i8* %c.addr.0352, i64 14
-  %56 = load i8, i8* %arrayidx165, align 1
-  %arrayidx167 = getelementptr inbounds i8, i8* %d.addr.0353, i64 14
-  %57 = load i8, i8* %arrayidx167, align 1
-  %arrayidx169 = getelementptr inbounds i8, i8* %a.addr.0355, i64 14
-  %58 = load i8, i8* %arrayidx169, align 1
-  %arrayidx172 = getelementptr inbounds i8, i8* %b.addr.0351, i64 14
-  %59 = load i8, i8* %arrayidx172, align 1
+  %arrayidx164 = getelementptr inbounds i8, ptr %e.addr.0354, i64 13
+  store i8 %retval.0.i314, ptr %arrayidx164, align 1
+  %arrayidx165 = getelementptr inbounds i8, ptr %c.addr.0352, i64 14
+  %56 = load i8, ptr %arrayidx165, align 1
+  %arrayidx167 = getelementptr inbounds i8, ptr %d.addr.0353, i64 14
+  %57 = load i8, ptr %arrayidx167, align 1
+  %arrayidx169 = getelementptr inbounds i8, ptr %a.addr.0355, i64 14
+  %58 = load i8, ptr %arrayidx169, align 1
+  %arrayidx172 = getelementptr inbounds i8, ptr %b.addr.0351, i64 14
+  %59 = load i8, ptr %arrayidx172, align 1
   %cmp.i309 = icmp ult i8 %56, %57
   %b.a.i310.v.v = select i1 %cmp.i309, i8 %59, i8 %58
   %b.a.i310.v = zext i8 %b.a.i310.v.v to i32
   %b.a.i310 = mul i32 %b.a.i310.v, %w
   %retval.0.i311 = trunc i32 %b.a.i310 to i8
-  %arrayidx176 = getelementptr inbounds i8, i8* %e.addr.0354, i64 14
-  store i8 %retval.0.i311, i8* %arrayidx176, align 1
-  %arrayidx177 = getelementptr inbounds i8, i8* %c.addr.0352, i64 15
-  %60 = load i8, i8* %arrayidx177, align 1
-  %arrayidx179 = getelementptr inbounds i8, i8* %d.addr.0353, i64 15
-  %61 = load i8, i8* %arrayidx179, align 1
-  %arrayidx181 = getelementptr inbounds i8, i8* %a.addr.0355, i64 15
-  %62 = load i8, i8* %arrayidx181, align 1
-  %arrayidx184 = getelementptr inbounds i8, i8* %b.addr.0351, i64 15
-  %63 = load i8, i8* %arrayidx184, align 1
+  %arrayidx176 = getelementptr inbounds i8, ptr %e.addr.0354, i64 14
+  store i8 %retval.0.i311, ptr %arrayidx176, align 1
+  %arrayidx177 = getelementptr inbounds i8, ptr %c.addr.0352, i64 15
+  %60 = load i8, ptr %arrayidx177, align 1
+  %arrayidx179 = getelementptr inbounds i8, ptr %d.addr.0353, i64 15
+  %61 = load i8, ptr %arrayidx179, align 1
+  %arrayidx181 = getelementptr inbounds i8, ptr %a.addr.0355, i64 15
+  %62 = load i8, ptr %arrayidx181, align 1
+  %arrayidx184 = getelementptr inbounds i8, ptr %b.addr.0351, i64 15
+  %63 = load i8, ptr %arrayidx184, align 1
   %cmp.i306 = icmp ult i8 %60, %61
   %b.a.i307.v.v = select i1 %cmp.i306, i8 %63, i8 %62
   %b.a.i307.v = zext i8 %b.a.i307.v.v to i32
   %b.a.i307 = mul i32 %b.a.i307.v, %w
   %retval.0.i308 = trunc i32 %b.a.i307 to i8
-  %arrayidx188 = getelementptr inbounds i8, i8* %e.addr.0354, i64 15
-  store i8 %retval.0.i308, i8* %arrayidx188, align 1
+  %arrayidx188 = getelementptr inbounds i8, ptr %e.addr.0354, i64 15
+  store i8 %retval.0.i308, ptr %arrayidx188, align 1
   %inc = add nuw nsw i32 %i.0356, 1
-  %add.ptr = getelementptr inbounds i8, i8* %a.addr.0355, i64 16
-  %add.ptr189 = getelementptr inbounds i8, i8* %b.addr.0351, i64 16
-  %add.ptr190 = getelementptr inbounds i8, i8* %c.addr.0352, i64 16
-  %add.ptr191 = getelementptr inbounds i8, i8* %d.addr.0353, i64 16
-  %add.ptr192 = getelementptr inbounds i8, i8* %e.addr.0354, i64 16
+  %add.ptr = getelementptr inbounds i8, ptr %a.addr.0355, i64 16
+  %add.ptr189 = getelementptr inbounds i8, ptr %b.addr.0351, i64 16
+  %add.ptr190 = getelementptr inbounds i8, ptr %c.addr.0352, i64 16
+  %add.ptr191 = getelementptr inbounds i8, ptr %d.addr.0353, i64 16
+  %add.ptr192 = getelementptr inbounds i8, ptr %e.addr.0354, i64 16
   %exitcond = icmp eq i32 %inc, 8
   br i1 %exitcond, label %for.end, label %for.body
 
@@ -324,54 +319,54 @@ for.end:                                          ; preds = %for.body
 define i32 @foo1() local_unnamed_addr #0 {
 ; SSE-LABEL: @foo1(
 ; SSE-NEXT:  entry:
-; SSE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16
+; SSE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @ib, align 16
 ; SSE-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP1]], ptr @ia, align 16
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 4), align 16
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 8), align 16
 ; SSE-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 8), align 16
+; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 12), align 16
 ; SSE-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 12), align 16
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16
 ; SSE-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 20), align 16
 ; SSE-NEXT:    [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP11]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 20), align 16
+; SSE-NEXT:    [[TMP12:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 24), align 16
 ; SSE-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP13]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 24), align 16
+; SSE-NEXT:    [[TMP14:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 28), align 16
 ; SSE-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP15]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 28), align 16
+; SSE-NEXT:    [[TMP16:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16
 ; SSE-NEXT:    [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP17]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16
+; SSE-NEXT:    [[TMP18:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 36), align 16
 ; SSE-NEXT:    [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP19]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 36), align 16
+; SSE-NEXT:    [[TMP20:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 40), align 16
 ; SSE-NEXT:    [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP21]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 40), align 16
+; SSE-NEXT:    [[TMP22:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 44), align 16
 ; SSE-NEXT:    [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP23]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 44), align 16
+; SSE-NEXT:    [[TMP24:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16
 ; SSE-NEXT:    [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP25]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16
+; SSE-NEXT:    [[TMP26:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 52), align 16
 ; SSE-NEXT:    [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP27]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 52), align 16
+; SSE-NEXT:    [[TMP28:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 56), align 16
 ; SSE-NEXT:    [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16
-; SSE-NEXT:    [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP29]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 56), align 16
+; SSE-NEXT:    [[TMP30:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 60), align 16
 ; SSE-NEXT:    [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; SSE-NEXT:    store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x i32> [[TMP31]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 60), align 16
 ; SSE-NEXT:    br label [[FOR_BODY5:%.*]]
 ; SSE:       for.cond3:
 ; SSE-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
@@ -379,10 +374,10 @@ define i32 @foo1() local_unnamed_addr #0 {
 ; SSE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
 ; SSE:       for.body5:
 ; SSE-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
-; SSE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
-; SSE-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
-; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
-; SSE-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+; SSE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], ptr @ia, i64 0, i64 [[INDVARS_IV]]
+; SSE-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], ptr @ib, i64 0, i64 [[INDVARS_IV]]
+; SSE-NEXT:    [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
 ; SSE-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP33]], -1
 ; SSE-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]]
 ; SSE-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
@@ -394,18 +389,18 @@ define i32 @foo1() local_unnamed_addr #0 {
 ;
 ; AVX512-LABEL: @foo1(
 ; AVX512-NEXT:  entry:
-; AVX512-NEXT:    [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([64 x i32]* @ib to <16 x i32>*), align 16
+; AVX512-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr @ib, align 16
 ; AVX512-NEXT:    [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; AVX512-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* bitcast ([64 x i32]* @ia to <16 x i32>*), align 16
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <16 x i32>*), align 16
+; AVX512-NEXT:    store <16 x i32> [[TMP1]], ptr @ia, align 16
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16
 ; AVX512-NEXT:    [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <16 x i32>*), align 16
-; AVX512-NEXT:    [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <16 x i32>*), align 16
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16
+; AVX512-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16
 ; AVX512-NEXT:    [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; AVX512-NEXT:    store <16 x i32> [[TMP5]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <16 x i32>*), align 16
-; AVX512-NEXT:    [[TMP6:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <16 x i32>*), align 16
+; AVX512-NEXT:    store <16 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16
+; AVX512-NEXT:    [[TMP6:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16
 ; AVX512-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; AVX512-NEXT:    store <16 x i32> [[TMP7]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <16 x i32>*), align 16
+; AVX512-NEXT:    store <16 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16
 ; AVX512-NEXT:    br label [[FOR_BODY5:%.*]]
 ; AVX512:       for.cond3:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
@@ -413,10 +408,10 @@ define i32 @foo1() local_unnamed_addr #0 {
 ; AVX512-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
 ; AVX512:       for.body5:
 ; AVX512-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
-; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
-; AVX512-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], ptr @ia, i64 0, i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; AVX512-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], ptr @ib, i64 0, i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
 ; AVX512-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP9]], -1
 ; AVX512-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP8]], [[NEG10]]
 ; AVX512-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
@@ -427,198 +422,198 @@ define i32 @foo1() local_unnamed_addr #0 {
 ; AVX512-NEXT:    ret i32 0
 ;
 entry:
-  %0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16
+  %0 = load i32, ptr @ib, align 16
   %neg = xor i32 %0, -1
-  store i32 %neg, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 0), align 16
-  %1 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 1), align 4
+  store i32 %neg, ptr @ia, align 16
+  %1 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 1), align 4
   %neg.1 = xor i32 %1, -1
-  store i32 %neg.1, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 1), align 4
-  %2 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 2), align 8
+  store i32 %neg.1, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 1), align 4
+  %2 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 2), align 8
   %neg.2 = xor i32 %2, -1
-  store i32 %neg.2, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 2), align 8
-  %3 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 3), align 4
+  store i32 %neg.2, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 2), align 8
+  %3 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 3), align 4
   %neg.3 = xor i32 %3, -1
-  store i32 %neg.3, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 3), align 4
-  %4 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4), align 16
+  store i32 %neg.3, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 3), align 4
+  %4 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 4), align 16
   %neg.4 = xor i32 %4, -1
-  store i32 %neg.4, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4), align 16
-  %5 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 5), align 4
+  store i32 %neg.4, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 4), align 16
+  %5 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 5), align 4
   %neg.5 = xor i32 %5, -1
-  store i32 %neg.5, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 5), align 4
-  %6 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 6), align 8
+  store i32 %neg.5, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 5), align 4
+  %6 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 6), align 8
   %neg.6 = xor i32 %6, -1
-  store i32 %neg.6, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 6), align 8
-  %7 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 7), align 4
+  store i32 %neg.6, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 6), align 8
+  %7 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 7), align 4
   %neg.7 = xor i32 %7, -1
-  store i32 %neg.7, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 7), align 4
-  %8 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8), align 16
+  store i32 %neg.7, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 7), align 4
+  %8 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 8), align 16
   %neg.8 = xor i32 %8, -1
-  store i32 %neg.8, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8), align 16
-  %9 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 9), align 4
+  store i32 %neg.8, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 8), align 16
+  %9 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 9), align 4
   %neg.9 = xor i32 %9, -1
-  store i32 %neg.9, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 9), align 4
-  %10 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 10), align 8
+  store i32 %neg.9, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 9), align 4
+  %10 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 10), align 8
   %neg.10 = xor i32 %10, -1
-  store i32 %neg.10, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 10), align 8
-  %11 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 11), align 4
+  store i32 %neg.10, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 10), align 8
+  %11 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 11), align 4
   %neg.11 = xor i32 %11, -1
-  store i32 %neg.11, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 11), align 4
-  %12 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12), align 16
+  store i32 %neg.11, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 11), align 4
+  %12 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 12), align 16
   %neg.12 = xor i32 %12, -1
-  store i32 %neg.12, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12), align 16
-  %13 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 13), align 4
+  store i32 %neg.12, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 12), align 16
+  %13 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 13), align 4
   %neg.13 = xor i32 %13, -1
-  store i32 %neg.13, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 13), align 4
-  %14 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 14), align 8
+  store i32 %neg.13, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 13), align 4
+  %14 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 14), align 8
   %neg.14 = xor i32 %14, -1
-  store i32 %neg.14, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 14), align 8
-  %15 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 15), align 4
+  store i32 %neg.14, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 14), align 8
+  %15 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 15), align 4
   %neg.15 = xor i32 %15, -1
-  store i32 %neg.15, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 15), align 4
-  %16 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16), align 16
+  store i32 %neg.15, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 15), align 4
+  %16 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16
   %neg.16 = xor i32 %16, -1
-  store i32 %neg.16, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16), align 16
-  %17 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 17), align 4
+  store i32 %neg.16, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16
+  %17 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 17), align 4
   %neg.17 = xor i32 %17, -1
-  store i32 %neg.17, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 17), align 4
-  %18 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 18), align 8
+  store i32 %neg.17, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 17), align 4
+  %18 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 18), align 8
   %neg.18 = xor i32 %18, -1
-  store i32 %neg.18, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 18), align 8
-  %19 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 19), align 4
+  store i32 %neg.18, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 18), align 8
+  %19 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 19), align 4
   %neg.19 = xor i32 %19, -1
-  store i32 %neg.19, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 19), align 4
-  %20 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20), align 16
+  store i32 %neg.19, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 19), align 4
+  %20 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 20), align 16
   %neg.20 = xor i32 %20, -1
-  store i32 %neg.20, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20), align 16
-  %21 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 21), align 4
+  store i32 %neg.20, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 20), align 16
+  %21 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 21), align 4
   %neg.21 = xor i32 %21, -1
-  store i32 %neg.21, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 21), align 4
-  %22 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 22), align 8
+  store i32 %neg.21, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 21), align 4
+  %22 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 22), align 8
   %neg.22 = xor i32 %22, -1
-  store i32 %neg.22, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 22), align 8
-  %23 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 23), align 4
+  store i32 %neg.22, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 22), align 8
+  %23 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 23), align 4
   %neg.23 = xor i32 %23, -1
-  store i32 %neg.23, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 23), align 4
-  %24 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24), align 16
+  store i32 %neg.23, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 23), align 4
+  %24 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 24), align 16
   %neg.24 = xor i32 %24, -1
-  store i32 %neg.24, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24), align 16
-  %25 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 25), align 4
+  store i32 %neg.24, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 24), align 16
+  %25 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 25), align 4
   %neg.25 = xor i32 %25, -1
-  store i32 %neg.25, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 25), align 4
-  %26 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 26), align 8
+  store i32 %neg.25, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 25), align 4
+  %26 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 26), align 8
   %neg.26 = xor i32 %26, -1
-  store i32 %neg.26, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 26), align 8
-  %27 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 27), align 4
+  store i32 %neg.26, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 26), align 8
+  %27 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 27), align 4
   %neg.27 = xor i32 %27, -1
-  store i32 %neg.27, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 27), align 4
-  %28 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28), align 16
+  store i32 %neg.27, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 27), align 4
+  %28 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 28), align 16
   %neg.28 = xor i32 %28, -1
-  store i32 %neg.28, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28), align 16
-  %29 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 29), align 4
+  store i32 %neg.28, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 28), align 16
+  %29 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 29), align 4
   %neg.29 = xor i32 %29, -1
-  store i32 %neg.29, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 29), align 4
-  %30 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 30), align 8
+  store i32 %neg.29, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 29), align 4
+  %30 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 30), align 8
   %neg.30 = xor i32 %30, -1
-  store i32 %neg.30, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 30), align 8
-  %31 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 31), align 4
+  store i32 %neg.30, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 30), align 8
+  %31 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 31), align 4
   %neg.31 = xor i32 %31, -1
-  store i32 %neg.31, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 31), align 4
-  %32 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32), align 16
+  store i32 %neg.31, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 31), align 4
+  %32 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16
   %neg.32 = xor i32 %32, -1
-  store i32 %neg.32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32), align 16
-  %33 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 33), align 4
+  store i32 %neg.32, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16
+  %33 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 33), align 4
   %neg.33 = xor i32 %33, -1
-  store i32 %neg.33, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 33), align 4
-  %34 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 34), align 8
+  store i32 %neg.33, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 33), align 4
+  %34 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 34), align 8
   %neg.34 = xor i32 %34, -1
-  store i32 %neg.34, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 34), align 8
-  %35 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 35), align 4
+  store i32 %neg.34, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 34), align 8
+  %35 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 35), align 4
   %neg.35 = xor i32 %35, -1
-  store i32 %neg.35, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 35), align 4
-  %36 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36), align 16
+  store i32 %neg.35, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 35), align 4
+  %36 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 36), align 16
   %neg.36 = xor i32 %36, -1
-  store i32 %neg.36, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36), align 16
-  %37 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 37), align 4
+  store i32 %neg.36, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 36), align 16
+  %37 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 37), align 4
   %neg.37 = xor i32 %37, -1
-  store i32 %neg.37, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 37), align 4
-  %38 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 38), align 8
+  store i32 %neg.37, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 37), align 4
+  %38 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 38), align 8
   %neg.38 = xor i32 %38, -1
-  store i32 %neg.38, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 38), align 8
-  %39 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 39), align 4
+  store i32 %neg.38, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 38), align 8
+  %39 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 39), align 4
   %neg.39 = xor i32 %39, -1
-  store i32 %neg.39, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 39), align 4
-  %40 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40), align 16
+  store i32 %neg.39, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 39), align 4
+  %40 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 40), align 16
   %neg.40 = xor i32 %40, -1
-  store i32 %neg.40, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40), align 16
-  %41 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 41), align 4
+  store i32 %neg.40, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 40), align 16
+  %41 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 41), align 4
   %neg.41 = xor i32 %41, -1
-  store i32 %neg.41, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 41), align 4
-  %42 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 42), align 8
+  store i32 %neg.41, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 41), align 4
+  %42 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 42), align 8
   %neg.42 = xor i32 %42, -1
-  store i32 %neg.42, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 42), align 8
-  %43 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 43), align 4
+  store i32 %neg.42, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 42), align 8
+  %43 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 43), align 4
   %neg.43 = xor i32 %43, -1
-  store i32 %neg.43, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 43), align 4
-  %44 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44), align 16
+  store i32 %neg.43, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 43), align 4
+  %44 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 44), align 16
   %neg.44 = xor i32 %44, -1
-  store i32 %neg.44, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44), align 16
-  %45 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 45), align 4
+  store i32 %neg.44, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 44), align 16
+  %45 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 45), align 4
   %neg.45 = xor i32 %45, -1
-  store i32 %neg.45, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 45), align 4
-  %46 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 46), align 8
+  store i32 %neg.45, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 45), align 4
+  %46 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 46), align 8
   %neg.46 = xor i32 %46, -1
-  store i32 %neg.46, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 46), align 8
-  %47 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 47), align 4
+  store i32 %neg.46, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 46), align 8
+  %47 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 47), align 4
   %neg.47 = xor i32 %47, -1
-  store i32 %neg.47, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 47), align 4
-  %48 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48), align 16
+  store i32 %neg.47, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 47), align 4
+  %48 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16
   %neg.48 = xor i32 %48, -1
-  store i32 %neg.48, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48), align 16
-  %49 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 49), align 4
+  store i32 %neg.48, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16
+  %49 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 49), align 4
   %neg.49 = xor i32 %49, -1
-  store i32 %neg.49, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 49), align 4
-  %50 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 50), align 8
+  store i32 %neg.49, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 49), align 4
+  %50 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 50), align 8
   %neg.50 = xor i32 %50, -1
-  store i32 %neg.50, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 50), align 8
-  %51 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 51), align 4
+  store i32 %neg.50, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 50), align 8
+  %51 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 51), align 4
   %neg.51 = xor i32 %51, -1
-  store i32 %neg.51, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 51), align 4
-  %52 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52), align 16
+  store i32 %neg.51, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 51), align 4
+  %52 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 52), align 16
   %neg.52 = xor i32 %52, -1
-  store i32 %neg.52, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52), align 16
-  %53 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 53), align 4
+  store i32 %neg.52, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 52), align 16
+  %53 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 53), align 4
   %neg.53 = xor i32 %53, -1
-  store i32 %neg.53, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 53), align 4
-  %54 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 54), align 8
+  store i32 %neg.53, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 53), align 4
+  %54 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 54), align 8
   %neg.54 = xor i32 %54, -1
-  store i32 %neg.54, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 54), align 8
-  %55 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 55), align 4
+  store i32 %neg.54, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 54), align 8
+  %55 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 55), align 4
   %neg.55 = xor i32 %55, -1
-  store i32 %neg.55, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 55), align 4
-  %56 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56), align 16
+  store i32 %neg.55, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 55), align 4
+  %56 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 56), align 16
   %neg.56 = xor i32 %56, -1
-  store i32 %neg.56, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56), align 16
-  %57 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 57), align 4
+  store i32 %neg.56, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 56), align 16
+  %57 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 57), align 4
   %neg.57 = xor i32 %57, -1
-  store i32 %neg.57, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 57), align 4
-  %58 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 58), align 8
+  store i32 %neg.57, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 57), align 4
+  %58 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 58), align 8
   %neg.58 = xor i32 %58, -1
-  store i32 %neg.58, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 58), align 8
-  %59 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 59), align 4
+  store i32 %neg.58, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 58), align 8
+  %59 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 59), align 4
   %neg.59 = xor i32 %59, -1
-  store i32 %neg.59, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 59), align 4
-  %60 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60), align 16
+  store i32 %neg.59, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 59), align 4
+  %60 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 60), align 16
   %neg.60 = xor i32 %60, -1
-  store i32 %neg.60, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60), align 16
-  %61 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 61), align 4
+  store i32 %neg.60, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 60), align 16
+  %61 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 61), align 4
   %neg.61 = xor i32 %61, -1
-  store i32 %neg.61, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 61), align 4
-  %62 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 62), align 8
+  store i32 %neg.61, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 61), align 4
+  %62 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 62), align 8
   %neg.62 = xor i32 %62, -1
-  store i32 %neg.62, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 62), align 8
-  %63 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 63), align 4
+  store i32 %neg.62, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 62), align 8
+  %63 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 63), align 4
   %neg.63 = xor i32 %63, -1
-  store i32 %neg.63, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 63), align 4
+  store i32 %neg.63, ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 63), align 4
   br label %for.body5
 
 for.cond3:                                        ; preds = %for.body5
@@ -628,10 +623,10 @@ for.cond3:                                        ; preds = %for.body5
 
 for.body5:                                        ; preds = %entry, %for.cond3
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond3 ]
-  %arrayidx7 = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 %indvars.iv
-  %64 = load i32, i32* %arrayidx7, align 4
-  %arrayidx9 = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 %indvars.iv
-  %65 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds [64 x i32], ptr @ia, i64 0, i64 %indvars.iv
+  %64 = load i32, ptr %arrayidx7, align 4
+  %arrayidx9 = getelementptr inbounds [64 x i32], ptr @ib, i64 0, i64 %indvars.iv
+  %65 = load i32, ptr %arrayidx9, align 4
   %neg10 = xor i32 %65, -1
   %cmp11 = icmp eq i32 %64, %neg10
   br i1 %cmp11, label %for.cond3, label %if.then

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-crash-index.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-crash-index.ll
index 0b2d61926de54..9d7ba5a8b3033 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-crash-index.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-crash-index.ll
@@ -3,19 +3,19 @@
 
 ; These all crashing before patch
 
-define  <2 x i8> @negative_index(<2 x i8> %aux_vec, i8* %in) {
+define  <2 x i8> @negative_index(<2 x i8> %aux_vec, ptr %in) {
 ; CHECK-LABEL: @negative_index(
-; CHECK-NEXT:    [[IN0:%.*]] = load i8, i8* [[IN:%.*]], align 4
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[IN]], i64 1
-; CHECK-NEXT:    [[IN1:%.*]] = load i8, i8* [[G1]], align 4
+; CHECK-NEXT:    [[IN0:%.*]] = load i8, ptr [[IN:%.*]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 1
+; CHECK-NEXT:    [[IN1:%.*]] = load i8, ptr [[G1]], align 4
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <2 x i8> [[AUX_VEC:%.*]], i8 [[IN0]], i8 -1
 ; CHECK-NEXT:    [[V1:%.*]] = insertelement <2 x i8> [[V0]], i8 [[IN1]], i64 1
 ; CHECK-NEXT:    [[OUT:%.*]] = add <2 x i8> [[V1]], [[V1]]
 ; CHECK-NEXT:    ret <2 x i8> [[OUT]]
 ;
-  %in0 = load i8, i8* %in, align 4
-  %g1 = getelementptr inbounds i8, i8* %in, i64 1
-  %in1 = load i8, i8* %g1, align 4
+  %in0 = load i8, ptr %in, align 4
+  %g1 = getelementptr inbounds i8, ptr %in, i64 1
+  %in1 = load i8, ptr %g1, align 4
 
   %v0 = insertelement <2 x i8> %aux_vec, i8 %in0, i8 -1
   %v1 = insertelement <2 x i8> %v0, i8 %in1, i64 1
@@ -24,19 +24,19 @@ define  <2 x i8> @negative_index(<2 x i8> %aux_vec, i8* %in) {
   ret <2 x i8> %out
 }
 
-define  <2 x i8> @exceed_index(<2 x i8> %aux_vec, i8* %in) {
+define  <2 x i8> @exceed_index(<2 x i8> %aux_vec, ptr %in) {
 ; CHECK-LABEL: @exceed_index(
-; CHECK-NEXT:    [[IN0:%.*]] = load i8, i8* [[IN:%.*]], align 4
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[IN]], i64 1
-; CHECK-NEXT:    [[IN1:%.*]] = load i8, i8* [[G1]], align 4
+; CHECK-NEXT:    [[IN0:%.*]] = load i8, ptr [[IN:%.*]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 1
+; CHECK-NEXT:    [[IN1:%.*]] = load i8, ptr [[G1]], align 4
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <2 x i8> [[AUX_VEC:%.*]], i8 [[IN0]], i8 2
 ; CHECK-NEXT:    [[V1:%.*]] = insertelement <2 x i8> [[V0]], i8 [[IN1]], i64 1
 ; CHECK-NEXT:    [[OUT:%.*]] = add <2 x i8> [[V1]], [[V1]]
 ; CHECK-NEXT:    ret <2 x i8> [[OUT]]
 ;
-  %in0 = load i8, i8* %in, align 4
-  %g1 = getelementptr inbounds i8, i8* %in, i64 1
-  %in1 = load i8, i8* %g1, align 4
+  %in0 = load i8, ptr %in, align 4
+  %g1 = getelementptr inbounds i8, ptr %in, i64 1
+  %in1 = load i8, ptr %g1, align 4
 
   %v0 = insertelement <2 x i8> %aux_vec, i8 %in0, i8 2
   %v1 = insertelement <2 x i8> %v0, i8 %in1, i64 1
@@ -45,19 +45,19 @@ define  <2 x i8> @exceed_index(<2 x i8> %aux_vec, i8* %in) {
   ret <2 x i8> %out
 }
 
-define  <2 x i8> @poison_index(<2 x i8> %aux_vec, i8* %in) {
+define  <2 x i8> @poison_index(<2 x i8> %aux_vec, ptr %in) {
 ; CHECK-LABEL: @poison_index(
-; CHECK-NEXT:    [[IN0:%.*]] = load i8, i8* [[IN:%.*]], align 4
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[IN]], i64 1
-; CHECK-NEXT:    [[IN1:%.*]] = load i8, i8* [[G1]], align 4
+; CHECK-NEXT:    [[IN0:%.*]] = load i8, ptr [[IN:%.*]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 1
+; CHECK-NEXT:    [[IN1:%.*]] = load i8, ptr [[G1]], align 4
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <2 x i8> [[AUX_VEC:%.*]], i8 [[IN0]], i8 poison
 ; CHECK-NEXT:    [[V1:%.*]] = insertelement <2 x i8> [[V0]], i8 [[IN1]], i64 1
 ; CHECK-NEXT:    [[OUT:%.*]] = add <2 x i8> [[V1]], [[V1]]
 ; CHECK-NEXT:    ret <2 x i8> [[OUT]]
 ;
-  %in0 = load i8, i8* %in, align 4
-  %g1 = getelementptr inbounds i8, i8* %in, i64 1
-  %in1 = load i8, i8* %g1, align 4
+  %in0 = load i8, ptr %in, align 4
+  %g1 = getelementptr inbounds i8, ptr %in, i64 1
+  %in1 = load i8, ptr %g1, align 4
 
   %v0 = insertelement <2 x i8> %aux_vec, i8 %in0, i8 poison
   %v1 = insertelement <2 x i8> %v0, i8 %in1, i64 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-multiple-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-multiple-uses.ll
index df304b5470ed8..1b684e9060756 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-multiple-uses.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-multiple-uses.ll
@@ -4,7 +4,7 @@
 define void @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* undef, align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr undef, align 16
 ; CHECK-NEXT:    [[VEC_0_VEC_EXTRACT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[VEC_0_VEC_EXTRACT_I]], 1
 ; CHECK-NEXT:    [[VEC_0_VEC_INSERT_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[ADD_I]], i32 0
@@ -16,7 +16,7 @@ define void @main() {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load <2 x i64>, <2 x i64>* undef, align 16
+  %0 = load <2 x i64>, ptr undef, align 16
   %vec.0.vec.extract.i = extractelement <2 x i64> %0, i32 0
   %add.i = add i64 %vec.0.vec.extract.i, 1
   %vec.0.vec.insert.i = insertelement <2 x i64> %0, i64 %add.i, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
index d301ff8734cc0..99df7c7130ffa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
@@ -3,14 +3,12 @@
 
 %struct.sw = type { float, float, float, float }
 
-define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) {
+define { <2 x float>, <2 x float> } @foo(ptr %v) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* undef, align 4
-; CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_SW:%.*]], %struct.sw* [[V:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* undef, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr undef, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr undef, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[V:%.*]], align 16
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> <float undef, float poison, float poison, float undef>, float [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 2
@@ -25,11 +23,10 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) {
 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[INS2]]
 ;
 entry:
-  %0 = load float, float* undef, align 4
-  %x = getelementptr inbounds %struct.sw, %struct.sw* %v, i64 0, i32 0
-  %1 = load float, float* %x, align 16
-  %y = getelementptr inbounds %struct.sw, %struct.sw* %v, i64 0, i32 1
-  %2 = load float, float* %y, align 4
+  %0 = load float, ptr undef, align 4
+  %1 = load float, ptr %v, align 16
+  %y = getelementptr inbounds %struct.sw, ptr %v, i64 0, i32 1
+  %2 = load float, ptr %y, align 4
   %mul3 = fmul float %0, %2
   %add = fadd float undef, %mul3
   %add6 = fadd float %add, undef
@@ -38,7 +35,7 @@ entry:
   %add16 = fadd float %mul12, undef
   %add20 = fadd float undef, %add16
   %add24 = fadd float undef, %add20
-  %3 = load float, float* undef, align 4
+  %3 = load float, ptr undef, align 4
   %mul27 = fmul float %1, %3
   %add31 = fadd float %mul27, undef
   %add35 = fadd float undef, %add31

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll
index b5c7995c3ad7e..6756d9aa2101e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=slp-vectorizer -mtriple x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-define <4 x double> @test(double* %p2, double %i1754, double %i1781, double %i1778) {
+define <4 x double> @test(ptr %p2, double %i1754, double %i1781, double %i1778) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I1771:%.*]] = getelementptr inbounds double, double* [[P2:%.*]], i64 54
-; CHECK-NEXT:    [[I1772:%.*]] = load double, double* [[I1771]], align 8
-; CHECK-NEXT:    [[I1795:%.*]] = getelementptr inbounds double, double* [[P2]], i64 55
-; CHECK-NEXT:    [[I1796:%.*]] = load double, double* [[I1795]], align 8
+; CHECK-NEXT:    [[I1771:%.*]] = getelementptr inbounds double, ptr [[P2:%.*]], i64 54
+; CHECK-NEXT:    [[I1772:%.*]] = load double, ptr [[I1771]], align 8
+; CHECK-NEXT:    [[I1795:%.*]] = getelementptr inbounds double, ptr [[P2]], i64 55
+; CHECK-NEXT:    [[I1796:%.*]] = load double, ptr [[I1795]], align 8
 ; CHECK-NEXT:    [[I1797:%.*]] = fmul fast double [[I1796]], [[I1781:%.*]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> poison, double [[I1754:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[I1778:%.*]], i32 1
@@ -20,8 +20,8 @@ define <4 x double> @test(double* %p2, double %i1754, double %i1781, double %i17
 ; CHECK-NEXT:    ret <4 x double> [[TMP6]]
 ;
 entry:
-  %i1771 = getelementptr inbounds double, double* %p2, i64 54
-  %i1772 = load double, double* %i1771, align 8
+  %i1771 = getelementptr inbounds double, ptr %p2, i64 54
+  %i1772 = load double, ptr %i1771, align 8
   %i1773 = fmul fast double %i1772, %i1754
   %i1782 = fmul fast double %i1754, %i1754
   %i1783 = fadd fast double %i1782, 1.000000e+00
@@ -29,8 +29,8 @@ entry:
   %i1788 = fadd fast double %i1787, 1.000000e+00
   %i1792 = fmul fast double %i1754, %i1781
   %i1793 = fadd fast double %i1792, 1.000000e+00
-  %i1795 = getelementptr inbounds double, double* %p2, i64 55
-  %i1796 = load double, double* %i1795, align 8
+  %i1795 = getelementptr inbounds double, ptr %p2, i64 55
+  %i1796 = load double, ptr %i1795, align 8
   %i1797 = fmul fast double %i1796, %i1781
   %i1798 = fadd fast double %i1773, %i1797
   %i1976 = insertelement <4 x double> zeroinitializer, double %i1783, i64 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
index 1c96e80715d0b..4c5815c0bd144 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
@@ -1,63 +1,48 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
 
-define void @julia_2xdouble([2 x double]* sret([2 x double]), [2 x double]*, [2 x double]*, [2 x double]*) {
+define void @julia_2xdouble(ptr sret([2 x double]), ptr, ptr, ptr) {
 ; CHECK-LABEL: @julia_2xdouble(
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[TMP2:%.*]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[TMP3:%.*]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr [[TMP1:%.*]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
 ; CHECK-NEXT:    [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
 ; CHECK-NEXT:    [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
-; CHECK-NEXT:    store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store [2 x double] [[I1]], ptr [[TMP0:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
-  %px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
-  %x0 = load double, double* %px0, align 4
-  %py0 = getelementptr inbounds [2 x double], [2 x double]* %3, i64 0, i64 0
-  %y0 = load double, double* %py0, align 4
+  %x0 = load double, ptr %2, align 4
+  %y0 = load double, ptr %3, align 4
   %m0 = fmul double %x0, %y0
-  %px1 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 1
-  %x1 = load double, double* %px1, align 4
-  %py1 = getelementptr inbounds [2 x double], [2 x double]* %3, i64 0, i64 1
-  %y1 = load double, double* %py1, align 4
+  %px1 = getelementptr inbounds [2 x double], ptr %2, i64 0, i64 1
+  %x1 = load double, ptr %px1, align 4
+  %py1 = getelementptr inbounds [2 x double], ptr %3, i64 0, i64 1
+  %y1 = load double, ptr %py1, align 4
   %m1 = fmul double %x1, %y1
-  %pz0 = getelementptr inbounds [2 x double], [2 x double]* %1, i64 0, i64 0
-  %z0 = load double, double* %pz0, align 4
+  %z0 = load double, ptr %1, align 4
   %a0 = fadd double %m0, %z0
   %i0 = insertvalue [2 x double] undef, double %a0, 0
-  %pz1 = getelementptr inbounds [2 x double], [2 x double]* %1, i64 0, i64 1
-  %z1 = load double, double* %pz1, align 4
+  %pz1 = getelementptr inbounds [2 x double], ptr %1, i64 0, i64 1
+  %z1 = load double, ptr %pz1, align 4
   %a1 = fadd double %m1, %z1
   %i1 = insertvalue [2 x double] %i0, double %a1, 1
-  store [2 x double] %i1, [2 x double]* %0, align 4
+  store [2 x double] %i1, ptr %0, align 4
   ret void
 }
 
-define void @julia_4xfloat([4 x float]* sret([4 x float]), [4 x float]*, [4 x float]*, [4 x float]*) {
+define void @julia_4xfloat(ptr sret([4 x float]), ptr, ptr, ptr) {
 ; CHECK-LABEL: @julia_4xfloat(
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[TMP2:%.*]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[TMP3:%.*]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
 ; CHECK-NEXT:    [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
@@ -67,57 +52,52 @@ define void @julia_4xfloat([4 x float]* sret([4 x float]), [4 x float]*, [4 x fl
 ; CHECK-NEXT:    [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
 ; CHECK-NEXT:    [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
-; CHECK-NEXT:    store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store [4 x float] [[I3]], ptr [[TMP0:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
-  %px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
-  %x0 = load float, float* %px0, align 4
-  %py0 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0
-  %y0 = load float, float* %py0, align 4
+  %x0 = load float, ptr %2, align 4
+  %y0 = load float, ptr %3, align 4
   %m0 = fmul float %x0, %y0
-  %px1 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1
-  %x1 = load float, float* %px1, align 4
-  %py1 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1
-  %y1 = load float, float* %py1, align 4
+  %px1 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 1
+  %x1 = load float, ptr %px1, align 4
+  %py1 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 1
+  %y1 = load float, ptr %py1, align 4
   %m1 = fmul float %x1, %y1
-  %px2 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2
-  %x2 = load float, float* %px2, align 4
-  %py2 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2
-  %y2 = load float, float* %py2, align 4
+  %px2 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 2
+  %x2 = load float, ptr %px2, align 4
+  %py2 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 2
+  %y2 = load float, ptr %py2, align 4
   %m2 = fmul float %x2, %y2
-  %px3 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3
-  %x3 = load float, float* %px3, align 4
-  %py3 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3
-  %y3 = load float, float* %py3, align 4
+  %px3 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 3
+  %x3 = load float, ptr %px3, align 4
+  %py3 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 3
+  %y3 = load float, ptr %py3, align 4
   %m3 = fmul float %x3, %y3
-  %pz0 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0
-  %z0 = load float, float* %pz0, align 4
+  %z0 = load float, ptr %1, align 4
   %a0 = fadd float %m0, %z0
   %i0 = insertvalue [4 x float] undef, float %a0, 0
-  %pz1 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1
-  %z1 = load float, float* %pz1, align 4
+  %pz1 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 1
+  %z1 = load float, ptr %pz1, align 4
   %a1 = fadd float %m1, %z1
   %i1 = insertvalue [4 x float] %i0, float %a1, 1
-  %pz2 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2
-  %z2 = load float, float* %pz2, align 4
+  %pz2 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 2
+  %z2 = load float, ptr %pz2, align 4
   %a2 = fadd float %m2, %z2
   %i2 = insertvalue [4 x float] %i1, float %a2, 2
-  %pz3 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3
-  %z3 = load float, float* %pz3, align 4
+  %pz3 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 3
+  %z3 = load float, ptr %pz3, align 4
   %a3 = fadd float %m3, %z3
   %i3 = insertvalue [4 x float] %i2, float %a3, 3
-  store [4 x float] %i3, [4 x float]* %0, align 4
+  store [4 x float] %i3, ptr %0, align 4
   ret void
 }
 
-define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
+define void @julia_load_array_of_float(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @julia_load_array_of_float(
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
@@ -127,15 +107,15 @@ define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x fl
 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
-; CHECK-NEXT:    store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
+; CHECK-NEXT:    store [4 x float] [[C_ARR3]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
-  %a_arr = load [4 x float], [4 x float]* %a, align 4
+  %a_arr = load [4 x float], ptr %a, align 4
   %a0 = extractvalue [4 x float] %a_arr, 0
   %a2 = extractvalue [4 x float] %a_arr, 2
   %a1 = extractvalue [4 x float] %a_arr, 1
-  %b_arr = load [4 x float], [4 x float]* %b, align 4
+  %b_arr = load [4 x float], ptr %b, align 4
   %b0 = extractvalue [4 x float] %b_arr, 0
   %b2 = extractvalue [4 x float] %b_arr, 2
   %b1 = extractvalue [4 x float] %b_arr, 1
@@ -149,17 +129,15 @@ top:
   %c3 = fsub float %a3, %b3
   %c_arr2 = insertvalue [4 x float] %c_arr1, float %c2, 2
   %c_arr3 = insertvalue [4 x float] %c_arr2, float %c3, 3
-  store [4 x float] %c_arr3, [4 x float]* %c, align 4
+  store [4 x float] %c_arr3, ptr %c, align 4
   ret void
 }
 
-define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
+define void @julia_load_array_of_i32(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @julia_load_array_of_i32(
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
@@ -169,15 +147,15 @@ define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c
 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
-; CHECK-NEXT:    store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
+; CHECK-NEXT:    store [4 x i32] [[C_ARR3]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
-  %a_arr = load [4 x i32], [4 x i32]* %a, align 4
+  %a_arr = load [4 x i32], ptr %a, align 4
   %a0 = extractvalue [4 x i32] %a_arr, 0
   %a2 = extractvalue [4 x i32] %a_arr, 2
   %a1 = extractvalue [4 x i32] %a_arr, 1
-  %b_arr = load [4 x i32], [4 x i32]* %b, align 4
+  %b_arr = load [4 x i32], ptr %b, align 4
   %b0 = extractvalue [4 x i32] %b_arr, 0
   %b2 = extractvalue [4 x i32] %b_arr, 2
   %b1 = extractvalue [4 x i32] %b_arr, 1
@@ -191,20 +169,20 @@ top:
   %c3 = sub i32 %a3, %b3
   %c_arr2 = insertvalue [4 x i32] %c_arr1, i32 %c2, 2
   %c_arr3 = insertvalue [4 x i32] %c_arr2, i32 %c3, 3
-  store [4 x i32] %c_arr3, [4 x i32]* %c, align 4
+  store [4 x i32] %c_arr3, ptr %c, align 4
   ret void
 }
 
 ; Almost identical to previous test, but for type that should NOT be vectorized.
 ;
-define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
+define void @julia_load_array_of_i16(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @julia_load_array_of_i16(
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
+; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i16], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
 ; CHECK-NEXT:    [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
 ; CHECK-NEXT:    [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
-; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
+; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i16], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
 ; CHECK-NEXT:    [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
 ; CHECK-NEXT:    [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
@@ -218,15 +196,15 @@ define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c
 ; CHECK-NEXT:    [[C3:%.*]] = sub i16 [[A3]], [[B3]]
 ; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
 ; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
-; CHECK-NEXT:    store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
+; CHECK-NEXT:    store [4 x i16] [[C_ARR3]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
-  %a_arr = load [4 x i16], [4 x i16]* %a, align 4
+  %a_arr = load [4 x i16], ptr %a, align 4
   %a0 = extractvalue [4 x i16] %a_arr, 0
   %a2 = extractvalue [4 x i16] %a_arr, 2
   %a1 = extractvalue [4 x i16] %a_arr, 1
-  %b_arr = load [4 x i16], [4 x i16]* %b, align 4
+  %b_arr = load [4 x i16], ptr %b, align 4
   %b0 = extractvalue [4 x i16] %b_arr, 0
   %b2 = extractvalue [4 x i16] %b_arr, 2
   %b1 = extractvalue [4 x i16] %b_arr, 1
@@ -240,19 +218,17 @@ top:
   %c3 = sub i16 %a3, %b3
   %c_arr2 = insertvalue [4 x i16] %c_arr1, i16 %c2, 2
   %c_arr3 = insertvalue [4 x i16] %c_arr2, i16 %c3, 3
-  store [4 x i16] %c_arr3, [4 x i16]* %c, align 4
+  store [4 x i16] %c_arr3, ptr %c, align 4
   ret void
 }
 
 %pseudovec = type { float, float, float, float }
 
-define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
+define void @julia_load_struct_of_float(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @julia_load_struct_of_float(
 ; CHECK-NEXT:  top:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC:%.*]] undef, float [[TMP5]], 0
@@ -262,14 +238,14 @@ define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudov
 ; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT1]], float [[TMP7]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
 ; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT2]], float [[TMP8]], 3
-; CHECK-NEXT:    store [[PSEUDOVEC]] [[C_STRUCT3]], %pseudovec* [[C:%.*]], align 4
+; CHECK-NEXT:    store [[PSEUDOVEC]] [[C_STRUCT3]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
-  %a_struct = load %pseudovec, %pseudovec* %a, align 4
+  %a_struct = load %pseudovec, ptr %a, align 4
   %a0 = extractvalue %pseudovec %a_struct, 0
   %a1 = extractvalue %pseudovec %a_struct, 1
-  %b_struct = load %pseudovec, %pseudovec* %b, align 4
+  %b_struct = load %pseudovec, ptr %b, align 4
   %a2 = extractvalue %pseudovec %a_struct, 2
   %b0 = extractvalue %pseudovec %b_struct, 0
   %a3 = extractvalue %pseudovec %a_struct, 3
@@ -284,6 +260,6 @@ top:
   %c2 = fsub float %a2, %b2
   %c_struct2 = insertvalue %pseudovec %c_struct1, float %c2, 2
   %c_struct3 = insertvalue %pseudovec %c_struct2, float %c3, 3
-  store %pseudovec %c_struct3, %pseudovec* %c, align 4
+  store %pseudovec %c_struct3, ptr %c, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll
index f1f460735d1d1..6c4572593027d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -slp-max-reg-size=128 | FileCheck %s
 
-define void @inst_size(i64* %a, <2 x i64> %b) {
+define void @inst_size(ptr %a, <2 x i64> %b) {
 ; CHECK-LABEL: @inst_size(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VAL:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[A:%.*]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[T41:%.*]] = icmp sgt i64 0, [[VAL]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    br label [[BLOCK:%.*]]
@@ -17,13 +16,13 @@ define void @inst_size(i64* %a, <2 x i64> %b) {
 ;
 entry:
   %val = extractelement <2 x i64> %b, i32 0
-  %tmpl1 = load i64, i64* %a, align 4
-  %ptr2 = getelementptr inbounds i64, i64* %a, i64 1
-  %tmpl2 = load i64, i64* %ptr2, align 4
-  %ptr3 = getelementptr inbounds i64, i64* %a, i64 2
-  %tmpl3 = load i64, i64* %ptr3, align 4
-  %ptr4 = getelementptr inbounds i64, i64* %a, i64 3
-  %tmpl4 = load i64, i64* %ptr4, align 4
+  %tmpl1 = load i64, ptr %a, align 4
+  %ptr2 = getelementptr inbounds i64, ptr %a, i64 1
+  %tmpl2 = load i64, ptr %ptr2, align 4
+  %ptr3 = getelementptr inbounds i64, ptr %a, i64 2
+  %tmpl3 = load i64, ptr %ptr3, align 4
+  %ptr4 = getelementptr inbounds i64, ptr %a, i64 3
+  %tmpl4 = load i64, ptr %ptr4, align 4
   %t41 = icmp sgt i64 0, %val
   %t42 = icmp sgt i64 0, %tmpl1
   %t43 = icmp sgt i64 0, %tmpl2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index bdba775134c9e..0eaf89da36601 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -6,257 +6,245 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 declare double @llvm.fabs.f64(double) nounwind readnone
 
-define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
+define void @vec_fabs_f64(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @vec_fabs_f64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone
-  store double %call, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %call5, double* %arrayidx5, align 8
+  store double %call, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %call5, ptr %arrayidx5, align 8
   ret void
 }
 
 declare float @llvm.copysign.f32(float, float) nounwind readnone
 
-define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
+define void @vec_copysign_f32(ptr %a, ptr %b, ptr noalias %c) {
 ; CHECK-LABEL: @vec_copysign_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load float, float* %a, align 4
-  %1 = load float, float* %b, align 4
+  %0 = load float, ptr %a, align 4
+  %1 = load float, ptr %b, align 4
   %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
-  store float %call0, float* %c, align 4
+  store float %call0, ptr %c, align 4
 
-  %ix2 = getelementptr inbounds float, float* %a, i64 1
-  %2 = load float, float* %ix2, align 4
-  %ix3 = getelementptr inbounds float, float* %b, i64 1
-  %3 = load float, float* %ix3, align 4
+  %ix2 = getelementptr inbounds float, ptr %a, i64 1
+  %2 = load float, ptr %ix2, align 4
+  %ix3 = getelementptr inbounds float, ptr %b, i64 1
+  %3 = load float, ptr %ix3, align 4
   %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone
-  %c1 = getelementptr inbounds float, float* %c, i64 1
-  store float %call1, float* %c1, align 4
+  %c1 = getelementptr inbounds float, ptr %c, i64 1
+  store float %call1, ptr %c1, align 4
 
-  %ix4 = getelementptr inbounds float, float* %a, i64 2
-  %4 = load float, float* %ix4, align 4
-  %ix5 = getelementptr inbounds float, float* %b, i64 2
-  %5 = load float, float* %ix5, align 4
+  %ix4 = getelementptr inbounds float, ptr %a, i64 2
+  %4 = load float, ptr %ix4, align 4
+  %ix5 = getelementptr inbounds float, ptr %b, i64 2
+  %5 = load float, ptr %ix5, align 4
   %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone
-  %c2 = getelementptr inbounds float, float* %c, i64 2
-  store float %call2, float* %c2, align 4
+  %c2 = getelementptr inbounds float, ptr %c, i64 2
+  store float %call2, ptr %c2, align 4
 
-  %ix6 = getelementptr inbounds float, float* %a, i64 3
-  %6 = load float, float* %ix6, align 4
-  %ix7 = getelementptr inbounds float, float* %b, i64 3
-  %7 = load float, float* %ix7, align 4
+  %ix6 = getelementptr inbounds float, ptr %a, i64 3
+  %6 = load float, ptr %ix6, align 4
+  %ix7 = getelementptr inbounds float, ptr %b, i64 3
+  %7 = load float, ptr %ix7, align 4
   %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone
-  %c3 = getelementptr inbounds float, float* %c, i64 3
-  store float %call3, float* %c3, align 4
+  %c3 = getelementptr inbounds float, ptr %c, i64 3
+  store float %call3, ptr %c3, align 4
 
   ret void
 }
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
-define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
+define void @vec_bswap_i32(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @vec_bswap_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load i32, i32* %a, align 4
-  %i1 = load i32, i32* %b, align 4
+  %i0 = load i32, ptr %a, align 4
+  %i1 = load i32, ptr %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1
+  %i2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1
+  %i3 = load i32, ptr %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2
+  %i4 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2
+  %i5 = load i32, ptr %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3
+  %i6 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3
+  %i7 = load i32, ptr %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone
 
-  store i32 %call1, i32* %c, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
-  store i32 %call2, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
-  store i32 %call3, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
-  store i32 %call4, i32* %arrayidx10, align 4
+  store i32 %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1
+  store i32 %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2
+  store i32 %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3
+  store i32 %call4, ptr %arrayidx10, align 4
   ret void
 
 }
 
 declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
 
-define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
+define void @vec_ctlz_i32(ptr %a, ptr %b, ptr %c, i1) {
 ; CHECK-LABEL: @vec_ctlz_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP5]], i1 true)
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load i32, i32* %a, align 4
-  %i1 = load i32, i32* %b, align 4
+  %i0 = load i32, ptr %a, align 4
+  %i1 = load i32, ptr %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1
+  %i2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1
+  %i3 = load i32, ptr %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2
+  %i4 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2
+  %i5 = load i32, ptr %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3
+  %i6 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3
+  %i7 = load i32, ptr %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
 
-  store i32 %call1, i32* %c, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
-  store i32 %call2, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
-  store i32 %call3, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
-  store i32 %call4, i32* %arrayidx10, align 4
+  store i32 %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1
+  store i32 %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2
+  store i32 %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3
+  store i32 %call4, ptr %arrayidx10, align 4
   ret void
 
 }
 
-define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+define void @vec_ctlz_i32_neg(ptr %a, ptr %b, ptr %c, i1) {
 ; CHECK-LABEL: @vec_ctlz_i32_neg(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[A:%.*]], align 4
-; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    [[I0:%.*]] = load i32, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load i32, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
 ; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD1]], i1 true) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 1
-; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 1
-; CHECK-NEXT:    [[I3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
 ; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD2]], i1 false) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
-; CHECK-NEXT:    [[I4:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
-; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
 ; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD3]], i1 true) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
-; CHECK-NEXT:    [[I6:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
-; CHECK-NEXT:    [[I7:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
 ; CHECK-NEXT:    [[CALL4:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD4]], i1 false) #[[ATTR3]]
-; CHECK-NEXT:    store i32 [[CALL1]], i32* [[C:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 1
-; CHECK-NEXT:    store i32 [[CALL2]], i32* [[ARRAYIDX8]], align 4
-; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2
-; CHECK-NEXT:    store i32 [[CALL3]], i32* [[ARRAYIDX9]], align 4
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3
-; CHECK-NEXT:    store i32 [[CALL4]], i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    store i32 [[CALL1]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 1
+; CHECK-NEXT:    store i32 [[CALL2]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 2
+; CHECK-NEXT:    store i32 [[CALL3]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 3
+; CHECK-NEXT:    store i32 [[CALL4]], ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load i32, i32* %a, align 4
-  %i1 = load i32, i32* %b, align 4
+  %i0 = load i32, ptr %a, align 4
+  %i1 = load i32, ptr %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1
+  %i2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1
+  %i3 = load i32, ptr %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2
+  %i4 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2
+  %i5 = load i32, ptr %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3
+  %i6 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3
+  %i7 = load i32, ptr %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
 
-  store i32 %call1, i32* %c, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
-  store i32 %call2, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
-  store i32 %call3, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
-  store i32 %call4, i32* %arrayidx10, align 4
+  store i32 %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1
+  store i32 %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2
+  store i32 %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3
+  store i32 %call4, ptr %arrayidx10, align 4
   ret void
 
 
@@ -265,251 +253,245 @@ entry:
 
 declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
 
-define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
+define void @vec_cttz_i32(ptr %a, ptr %b, ptr %c, i1) {
 ; CHECK-LABEL: @vec_cttz_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP5]], i1 true)
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load i32, i32* %a, align 4
-  %i1 = load i32, i32* %b, align 4
+  %i0 = load i32, ptr %a, align 4
+  %i1 = load i32, ptr %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1
+  %i2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1
+  %i3 = load i32, ptr %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2
+  %i4 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2
+  %i5 = load i32, ptr %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3
+  %i6 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3
+  %i7 = load i32, ptr %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
 
-  store i32 %call1, i32* %c, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
-  store i32 %call2, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
-  store i32 %call3, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
-  store i32 %call4, i32* %arrayidx10, align 4
+  store i32 %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1
+  store i32 %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2
+  store i32 %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3
+  store i32 %call4, ptr %arrayidx10, align 4
   ret void
 
 }
 
-define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+define void @vec_cttz_i32_neg(ptr %a, ptr %b, ptr %c, i1) {
 ; CHECK-LABEL: @vec_cttz_i32_neg(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[A:%.*]], align 4
-; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    [[I0:%.*]] = load i32, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load i32, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
 ; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD1]], i1 true) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 1
-; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 1
-; CHECK-NEXT:    [[I3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
 ; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD2]], i1 false) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
-; CHECK-NEXT:    [[I4:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
-; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
 ; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD3]], i1 true) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
-; CHECK-NEXT:    [[I6:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
-; CHECK-NEXT:    [[I7:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
 ; CHECK-NEXT:    [[CALL4:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD4]], i1 false) #[[ATTR3]]
-; CHECK-NEXT:    store i32 [[CALL1]], i32* [[C:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 1
-; CHECK-NEXT:    store i32 [[CALL2]], i32* [[ARRAYIDX8]], align 4
-; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2
-; CHECK-NEXT:    store i32 [[CALL3]], i32* [[ARRAYIDX9]], align 4
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3
-; CHECK-NEXT:    store i32 [[CALL4]], i32* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    store i32 [[CALL1]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 1
+; CHECK-NEXT:    store i32 [[CALL2]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 2
+; CHECK-NEXT:    store i32 [[CALL3]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 3
+; CHECK-NEXT:    store i32 [[CALL4]], ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load i32, i32* %a, align 4
-  %i1 = load i32, i32* %b, align 4
+  %i0 = load i32, ptr %a, align 4
+  %i1 = load i32, ptr %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 1
+  %i2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i32 1
+  %i3 = load i32, ptr %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32, i32* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32, i32* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 2
+  %i4 = load i32, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i32 2
+  %i5 = load i32, ptr %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32, i32* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32, i32* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i32 3
+  %i6 = load i32, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i32 3
+  %i7 = load i32, ptr %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
 
-  store i32 %call1, i32* %c, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
-  store i32 %call2, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
-  store i32 %call3, i32* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
-  store i32 %call4, i32* %arrayidx10, align 4
+  store i32 %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %c, i32 1
+  store i32 %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i32 2
+  store i32 %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, ptr %c, i32 3
+  store i32 %call4, ptr %arrayidx10, align 4
   ret void
 
 }
 
 
 declare float @llvm.powi.f32.i32(float, i32)
-define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
+define void @vec_powi_f32(ptr %a, ptr %b, ptr %c, i32 %P) {
 ; CHECK-LABEL: @vec_powi_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP4]], i32 [[P:%.*]])
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load float, float* %a, align 4
-  %i1 = load float, float* %b, align 4
+  %i0 = load float, ptr %a, align 4
+  %i1 = load float, ptr %b, align 4
   %add1 = fadd float %i0, %i1
   %call1 = tail call float @llvm.powi.f32.i32(float %add1,i32 %P) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float, float* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
-  %i3 = load float, float* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i32 1
+  %i2 = load float, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %b, i32 1
+  %i3 = load float, ptr %arrayidx3, align 4
   %add2 = fadd float %i2, %i3
   %call2 = tail call float @llvm.powi.f32.i32(float %add2,i32 %P) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float, float* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
-  %i5 = load float, float* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i32 2
+  %i4 = load float, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %b, i32 2
+  %i5 = load float, ptr %arrayidx5, align 4
   %add3 = fadd float %i4, %i5
   %call3 = tail call float @llvm.powi.f32.i32(float %add3,i32 %P) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float, float* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
-  %i7 = load float, float* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %a, i32 3
+  %i6 = load float, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i32 3
+  %i7 = load float, ptr %arrayidx7, align 4
   %add4 = fadd float %i6, %i7
   %call4 = tail call float @llvm.powi.f32.i32(float %add4,i32 %P) nounwind readnone
 
-  store float %call1, float* %c, align 4
-  %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
-  store float %call2, float* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
-  store float %call3, float* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
-  store float %call4, float* %arrayidx10, align 4
+  store float %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds float, ptr %c, i32 1
+  store float %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float, ptr %c, i32 2
+  store float %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float, ptr %c, i32 3
+  store float %call4, ptr %arrayidx10, align 4
   ret void
 
 }
 
 
-define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
+define void @vec_powi_f32_neg(ptr %a, ptr %b, ptr %c, i32 %P, i32 %Q) {
 ; CHECK-LABEL: @vec_powi_f32_neg(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I0:%.*]] = load float, float* [[A:%.*]], align 4
-; CHECK-NEXT:    [[I1:%.*]] = load float, float* [[B:%.*]], align 4
+; CHECK-NEXT:    [[I0:%.*]] = load float, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[I0]], [[I1]]
 ; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD1]], i32 [[P:%.*]]) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i32 1
-; CHECK-NEXT:    [[I2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i32 1
-; CHECK-NEXT:    [[I3:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[I2]], [[I3]]
 ; CHECK-NEXT:    [[CALL2:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD2]], i32 [[Q:%.*]]) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i32 2
-; CHECK-NEXT:    [[I4:%.*]] = load float, float* [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[B]], i32 2
-; CHECK-NEXT:    [[I5:%.*]] = load float, float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[I4]], [[I5]]
 ; CHECK-NEXT:    [[CALL3:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD3]], i32 [[P]]) #[[ATTR3]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i32 3
-; CHECK-NEXT:    [[I6:%.*]] = load float, float* [[ARRAYIDX6]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[B]], i32 3
-; CHECK-NEXT:    [[I7:%.*]] = load float, float* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[I6]], [[I7]]
 ; CHECK-NEXT:    [[CALL4:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD4]], i32 [[Q]]) #[[ATTR3]]
-; CHECK-NEXT:    store float [[CALL1]], float* [[C:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C]], i32 1
-; CHECK-NEXT:    store float [[CALL2]], float* [[ARRAYIDX8]], align 4
-; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
-; CHECK-NEXT:    store float [[CALL3]], float* [[ARRAYIDX9]], align 4
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
-; CHECK-NEXT:    store float [[CALL4]], float* [[ARRAYIDX10]], align 4
+; CHECK-NEXT:    store float [[CALL1]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[C]], i32 1
+; CHECK-NEXT:    store float [[CALL2]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[C]], i32 2
+; CHECK-NEXT:    store float [[CALL3]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[C]], i32 3
+; CHECK-NEXT:    store float [[CALL4]], ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load float, float* %a, align 4
-  %i1 = load float, float* %b, align 4
+  %i0 = load float, ptr %a, align 4
+  %i1 = load float, ptr %b, align 4
   %add1 = fadd float %i0, %i1
   %call1 = tail call float @llvm.powi.f32.i32(float %add1,i32 %P) nounwind readnone
 
-  %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float, float* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
-  %i3 = load float, float* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i32 1
+  %i2 = load float, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %b, i32 1
+  %i3 = load float, ptr %arrayidx3, align 4
   %add2 = fadd float %i2, %i3
   %call2 = tail call float @llvm.powi.f32.i32(float %add2,i32 %Q) nounwind readnone
 
-  %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float, float* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
-  %i5 = load float, float* %arrayidx5, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i32 2
+  %i4 = load float, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %b, i32 2
+  %i5 = load float, ptr %arrayidx5, align 4
   %add3 = fadd float %i4, %i5
   %call3 = tail call float @llvm.powi.f32.i32(float %add3,i32 %P) nounwind readnone
 
-  %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float, float* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
-  %i7 = load float, float* %arrayidx7, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %a, i32 3
+  %i6 = load float, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i32 3
+  %i7 = load float, ptr %arrayidx7, align 4
   %add4 = fadd float %i6, %i7
   %call4 = tail call float @llvm.powi.f32.i32(float %add4,i32 %Q) nounwind readnone
 
-  store float %call1, float* %c, align 4
-  %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
-  store float %call2, float* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
-  store float %call3, float* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
-  store float %call4, float* %arrayidx10, align 4
+  store float %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds float, ptr %c, i32 1
+  store float %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float, ptr %c, i32 2
+  store float %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float, ptr %c, i32 3
+  store float %call4, ptr %arrayidx10, align 4
   ret void
 
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
index eac961f2b1a5d..f87a713faf5f5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
@@ -2,38 +2,36 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -slp-threshold=-8 | FileCheck %s
 
 declare float @llvm.powi.f32.i32(float, i32)
-define void @vec_powi_f32(float* %a, float* %c, i32 %P) {
+define void @vec_powi_f32(ptr %a, ptr %c, i32 %P) {
 ; CHECK-LABEL: @vec_powi_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP1]], i32 [[P:%.*]])
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load float, float* %a, align 4
+  %i0 = load float, ptr %a, align 4
   %call1 = tail call float @llvm.powi.f32.i32(float %i0,i32 %P)
 
-  %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i32 1
+  %i2 = load float, ptr %arrayidx2, align 4
   %call2 = tail call float @llvm.powi.f32.i32(float %i2,i32 %P)
 
-  %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i32 2
+  %i4 = load float, ptr %arrayidx4, align 4
   %call3 = tail call float @llvm.powi.f32.i32(float %i4,i32 %P)
 
-  %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %a, i32 3
+  %i6 = load float, ptr %arrayidx6, align 4
   %call4 = tail call float @llvm.powi.f32.i32(float %i6,i32 %P)
 
-  store float %call1, float* %c, align 4
-  %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
-  store float %call2, float* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
-  store float %call3, float* %arrayidx9, align 4
-  %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
-  store float %call4, float* %arrayidx10, align 4
+  store float %call1, ptr %c, align 4
+  %arrayidx8 = getelementptr inbounds float, ptr %c, i32 1
+  store float %call2, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float, ptr %c, i32 2
+  store float %call3, ptr %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float, ptr %c, i32 3
+  store float %call4, ptr %arrayidx10, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index b626a2f84cd3d..a616fd5f7c197 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -7,31 +7,31 @@
 define i32 @fn1() {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 poison>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (ptr @fn1 to i32), i32 poison>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], ptr @a, align 4
 ; CHECK-NEXT:    ret i32 0
 ;
   entry:
-  %0 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4
+  %0 = load i32, ptr @b, align 4
   %cmp = icmp sgt i32 %0, 0
   %cond = select i1 %cmp, i32 8, i32 0
-  store i32 %cond, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i32 3), align 4
-  %1 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @b, i64 0, i32 1), align 4
+  store i32 %cond, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i32 3), align 4
+  %1 = load i32, ptr getelementptr ([4 x i32], ptr @b, i64 0, i32 1), align 4
   %cmp1 = icmp sgt i32 %1, 0
   %. = select i1 %cmp1, i32 %1, i32 6
-  store i32 %., i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i32 0), align 4
-  %2 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @b, i64 0, i32 2), align 4
+  store i32 %., ptr @a, align 4
+  %2 = load i32, ptr getelementptr ([4 x i32], ptr @b, i64 0, i32 2), align 4
   %cmp4 = icmp sgt i32 %2, 0
-  %3 = select i1 %cmp4, i32 ptrtoint (i32 ()* @fn1 to i32), i32 0
-  store i32 %3, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i32 1), align 4
-  %4 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4
+  %3 = select i1 %cmp4, i32 ptrtoint (ptr @fn1 to i32), i32 0
+  store i32 %3, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i32 1), align 4
+  %4 = load i32, ptr getelementptr ([4 x i32], ptr @b, i64 0, i32 3), align 4
   %cmp6 = icmp sgt i32 %4, 0
-  %5 = select i1 %cmp6, i32 ptrtoint (i32 ()* @fn1 to i32), i32 0
-  store i32 %5, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i32 2), align 4
+  %5 = select i1 %cmp6, i32 ptrtoint (ptr @fn1 to i32), i32 0
+  store i32 %5, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i32 2), align 4
   ret i32 0
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll
index dc96a8a0eb604..e18ac7f4e2f62 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-shuffle-placement.ll
@@ -2,10 +2,10 @@
 ; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -passes=slp-vectorizer | FileCheck %s
 
 ;void jumble (int * restrict A, int * restrict B) {
-  ;  int tmp0 = A[10]*A[0];
-  ;  int tmp1 = A[11]*A[1];
-  ;  int tmp2 = A[12]*A[3];
-  ;  int tmp3 = A[13]*A[2];
+  ;  int tmp0 = Aptr A[0];
+  ;  int tmp1 = Aptr A[1];
+  ;  int tmp2 = Aptr A[3];
+  ;  int tmp3 = Aptr A[2];
   ;  B[0] = tmp0;
   ;  B[1] = tmp1;
   ;  B[2] = tmp2;
@@ -13,94 +13,88 @@
   ;}
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
+define void @jumble1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @jumble1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 10
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32, i32* %arrayidx, align 4
-  %1 = load i32, i32* %A, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 10
+  %0 = load i32, ptr %arrayidx, align 4
+  %1 = load i32, ptr %A, align 4
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 11
-  %2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 11
+  %2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 1
+  %3 = load i32, ptr %arrayidx3, align 4
   %mul4 = mul nsw i32 %2, %3
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 12
-  %4 = load i32, i32* %arrayidx5, align 4
-  %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 3
-  %5 = load i32, i32* %arrayidx6, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 12
+  %4 = load i32, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %A, i64 3
+  %5 = load i32, ptr %arrayidx6, align 4
   %mul7 = mul nsw i32 %4, %5
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 13
-  %6 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 2
-  %7 = load i32, i32* %arrayidx9, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 13
+  %6 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %A, i64 2
+  %7 = load i32, ptr %arrayidx9, align 4
   %mul10 = mul nsw i32 %6, %7
-  store i32 %mul, i32* %B, align 4
-  %arrayidx12 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul4, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul7, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul10, i32* %arrayidx14, align 4
+  store i32 %mul, ptr %B, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul4, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul7, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul10, ptr %arrayidx14, align 4
   ret void
 }
 
 ;Reversing the operand of MUL
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
+define void @jumble2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @jumble2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 10
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32, i32* %arrayidx, align 4
-  %1 = load i32, i32* %A, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 10
+  %0 = load i32, ptr %arrayidx, align 4
+  %1 = load i32, ptr %A, align 4
   %mul = mul nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 11
-  %2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 11
+  %2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 1
+  %3 = load i32, ptr %arrayidx3, align 4
   %mul4 = mul nsw i32 %3, %2
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 12
-  %4 = load i32, i32* %arrayidx5, align 4
-  %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 3
-  %5 = load i32, i32* %arrayidx6, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 12
+  %4 = load i32, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %A, i64 3
+  %5 = load i32, ptr %arrayidx6, align 4
   %mul7 = mul nsw i32 %5, %4
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 13
-  %6 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 2
-  %7 = load i32, i32* %arrayidx9, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 13
+  %6 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %A, i64 2
+  %7 = load i32, ptr %arrayidx9, align 4
   %mul10 = mul nsw i32 %7, %6
-  store i32 %mul, i32* %B, align 4
-  %arrayidx12 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul4, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul7, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul10, i32* %arrayidx14, align 4
+  store i32 %mul, ptr %B, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul4, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul7, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul10, ptr %arrayidx14, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
index 633a088adb538..23f0afbf17750 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
@@ -34,54 +34,49 @@
 
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) local_unnamed_addr #0 {
+define void @phiUsingLoads(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) local_unnamed_addr #0 {
 ; CHECK-LABEL: @phiUsingLoads(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 25
-; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 50
-; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 75
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 25
+; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 50
+; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 75
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP14:%.*]], <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP14:%.*]], ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP14]], [[FOR_INC]] ]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4
 ; CHECK-NEXT:    [[CMP13:%.*]] = icmp eq i32 [[TMP5]], 0
 ; CHECK-NEXT:    br i1 [[CMP13]], label [[IF_THEN14:%.*]], label [[IF_ELSE27:%.*]]
 ; CHECK:       if.then14:
-; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX17]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       if.else27:
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX28]], align 4
 ; CHECK-NEXT:    [[CMP29:%.*]] = icmp eq i32 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[CMP29]], label [[IF_THEN30:%.*]], label [[IF_ELSE43:%.*]]
 ; CHECK:       if.then30:
-; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX33]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       if.else43:
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX44]], align 4
 ; CHECK-NEXT:    [[CMP45:%.*]] = icmp eq i32 [[TMP11]], 0
 ; CHECK-NEXT:    br i1 [[CMP45]], label [[IF_THEN46:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then46:
-; CHECK-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
+; CHECK-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load <4 x i32>, ptr [[ARRAYIDX49]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -91,21 +86,21 @@ define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias noca
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ;
 entry:
-  %0 = load i32, i32* %A, align 4
+  %0 = load i32, ptr %A, align 4
   %cmp1 = icmp eq i32 %0, 0
-  %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 25
-  %arrayidx28 = getelementptr inbounds i32, i32* %A, i64 50
-  %arrayidx44 = getelementptr inbounds i32, i32* %A, i64 75
+  %arrayidx12 = getelementptr inbounds i32, ptr %A, i64 25
+  %arrayidx28 = getelementptr inbounds i32, ptr %A, i64 50
+  %arrayidx44 = getelementptr inbounds i32, ptr %A, i64 75
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.inc
-  store i32 %tmp0.1, i32* %B, align 4
-  %arrayidx64 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %tmp1.1, i32* %arrayidx64, align 4
-  %arrayidx65 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %tmp2.1, i32* %arrayidx65, align 4
-  %arrayidx66 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %tmp3.1, i32* %arrayidx66, align 4
+  store i32 %tmp0.1, ptr %B, align 4
+  %arrayidx64 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %tmp1.1, ptr %arrayidx64, align 4
+  %arrayidx65 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %tmp2.1, ptr %arrayidx65, align 4
+  %arrayidx66 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %tmp3.1, ptr %arrayidx66, align 4
   ret void
 
 for.body:                                         ; preds = %for.inc, %entry
@@ -117,74 +112,74 @@ for.body:                                         ; preds = %for.inc, %entry
   br i1 %cmp1, label %if.then, label %if.else
 
 if.then:                                          ; preds = %for.body
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %2 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %2
-  %3 = load i32, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %2
+  %3 = load i32, ptr %arrayidx5, align 4
   %4 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %4
-  %5 = load i32, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %4
+  %5 = load i32, ptr %arrayidx8, align 4
   %6 = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx11 = getelementptr inbounds i32, i32* %A, i64 %6
-  %7 = load i32, i32* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds i32, ptr %A, i64 %6
+  %7 = load i32, ptr %arrayidx11, align 4
   br label %for.inc
 
 if.else:                                          ; preds = %for.body
-  %8 = load i32, i32* %arrayidx12, align 4
+  %8 = load i32, ptr %arrayidx12, align 4
   %cmp13 = icmp eq i32 %8, 0
   br i1 %cmp13, label %if.then14, label %if.else27
 
 if.then14:                                        ; preds = %if.else
-  %arrayidx17 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %9 = load i32, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %9 = load i32, ptr %arrayidx17, align 4
   %10 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %10
-  %11 = load i32, i32* %arrayidx20, align 4
+  %arrayidx20 = getelementptr inbounds i32, ptr %A, i64 %10
+  %11 = load i32, ptr %arrayidx20, align 4
   %12 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx23 = getelementptr inbounds i32, i32* %A, i64 %12
-  %13 = load i32, i32* %arrayidx23, align 4
+  %arrayidx23 = getelementptr inbounds i32, ptr %A, i64 %12
+  %13 = load i32, ptr %arrayidx23, align 4
   %14 = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx26 = getelementptr inbounds i32, i32* %A, i64 %14
-  %15 = load i32, i32* %arrayidx26, align 4
+  %arrayidx26 = getelementptr inbounds i32, ptr %A, i64 %14
+  %15 = load i32, ptr %arrayidx26, align 4
   br label %for.inc
 
 if.else27:                                        ; preds = %if.else
-  %16 = load i32, i32* %arrayidx28, align 4
+  %16 = load i32, ptr %arrayidx28, align 4
   %cmp29 = icmp eq i32 %16, 0
   br i1 %cmp29, label %if.then30, label %if.else43
 
 if.then30:                                        ; preds = %if.else27
-  %arrayidx33 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %17 = load i32, i32* %arrayidx33, align 4
+  %arrayidx33 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %17 = load i32, ptr %arrayidx33, align 4
   %18 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx36 = getelementptr inbounds i32, i32* %A, i64 %18
-  %19 = load i32, i32* %arrayidx36, align 4
+  %arrayidx36 = getelementptr inbounds i32, ptr %A, i64 %18
+  %19 = load i32, ptr %arrayidx36, align 4
   %20 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx39 = getelementptr inbounds i32, i32* %A, i64 %20
-  %21 = load i32, i32* %arrayidx39, align 4
+  %arrayidx39 = getelementptr inbounds i32, ptr %A, i64 %20
+  %21 = load i32, ptr %arrayidx39, align 4
   %22 = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx42 = getelementptr inbounds i32, i32* %A, i64 %22
-  %23 = load i32, i32* %arrayidx42, align 4
+  %arrayidx42 = getelementptr inbounds i32, ptr %A, i64 %22
+  %23 = load i32, ptr %arrayidx42, align 4
   br label %for.inc
 
 if.else43:                                        ; preds = %if.else27
-  %24 = load i32, i32* %arrayidx44, align 4
+  %24 = load i32, ptr %arrayidx44, align 4
   %cmp45 = icmp eq i32 %24, 0
   br i1 %cmp45, label %if.then46, label %for.inc
 
 if.then46:                                        ; preds = %if.else43
-  %arrayidx49 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %25 = load i32, i32* %arrayidx49, align 4
+  %arrayidx49 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %25 = load i32, ptr %arrayidx49, align 4
   %26 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx52 = getelementptr inbounds i32, i32* %A, i64 %26
-  %27 = load i32, i32* %arrayidx52, align 4
+  %arrayidx52 = getelementptr inbounds i32, ptr %A, i64 %26
+  %27 = load i32, ptr %arrayidx52, align 4
   %28 = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx55 = getelementptr inbounds i32, i32* %A, i64 %28
-  %29 = load i32, i32* %arrayidx55, align 4
+  %arrayidx55 = getelementptr inbounds i32, ptr %A, i64 %28
+  %29 = load i32, ptr %arrayidx55, align 4
   %30 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx58 = getelementptr inbounds i32, i32* %A, i64 %30
-  %31 = load i32, i32* %arrayidx58, align 4
+  %arrayidx58 = getelementptr inbounds i32, ptr %A, i64 %30
+  %31 = load i32, ptr %arrayidx58, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %if.then, %if.then30, %if.else43, %if.then46, %if.then14

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
index 88a7dc849ed04..4f2d083c48568 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
@@ -3,88 +3,73 @@
 
 
 
-define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {
+define i32 @jumbled-load(ptr noalias nocapture %in, ptr noalias nocapture %inn, ptr noalias nocapture %out) {
 ; CHECK-LABEL: @jumbled-load(
-; CHECK-NEXT:    [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
-; CHECK-NEXT:    [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[IN:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[INN:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP2]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
-  %in.addr = getelementptr inbounds i32, i32* %in, i64 0
-  %load.1 = load i32, i32* %in.addr, align 4
-  %gep.1 = getelementptr inbounds i32, i32* %in.addr, i64 3
-  %load.2 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %in.addr, i64 1
-  %load.3 = load i32, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %in.addr, i64 2
-  %load.4 = load i32, i32* %gep.3, align 4
-  %inn.addr = getelementptr inbounds i32, i32* %inn, i64 0
-  %load.5 = load i32, i32* %inn.addr, align 4
-  %gep.4 = getelementptr inbounds i32, i32* %inn.addr, i64 2
-  %load.6 = load i32, i32* %gep.4, align 4
-  %gep.5 = getelementptr inbounds i32, i32* %inn.addr, i64 3
-  %load.7 = load i32, i32* %gep.5, align 4
-  %gep.6 = getelementptr inbounds i32, i32* %inn.addr, i64 1
-  %load.8 = load i32, i32* %gep.6, align 4
+  %load.1 = load i32, ptr %in, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %in, i64 3
+  %load.2 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %in, i64 1
+  %load.3 = load i32, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %in, i64 2
+  %load.4 = load i32, ptr %gep.3, align 4
+  %load.5 = load i32, ptr %inn, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %inn, i64 2
+  %load.6 = load i32, ptr %gep.4, align 4
+  %gep.5 = getelementptr inbounds i32, ptr %inn, i64 3
+  %load.7 = load i32, ptr %gep.5, align 4
+  %gep.6 = getelementptr inbounds i32, ptr %inn, i64 1
+  %load.8 = load i32, ptr %gep.6, align 4
   %mul.1 = mul i32 %load.3, %load.5
   %mul.2 = mul i32 %load.2, %load.8
   %mul.3 = mul i32 %load.4, %load.7
   %mul.4 = mul i32 %load.1, %load.6
-  %gep.7 = getelementptr inbounds i32, i32* %out, i64 0
-  store i32 %mul.1, i32* %gep.7, align 4
-  %gep.8 = getelementptr inbounds i32, i32* %out, i64 1
-  store i32 %mul.2, i32* %gep.8, align 4
-  %gep.9 = getelementptr inbounds i32, i32* %out, i64 2
-  store i32 %mul.3, i32* %gep.9, align 4
-  %gep.10 = getelementptr inbounds i32, i32* %out, i64 3
-  store i32 %mul.4, i32* %gep.10, align 4
+  store i32 %mul.1, ptr %out, align 4
+  %gep.8 = getelementptr inbounds i32, ptr %out, i64 1
+  store i32 %mul.2, ptr %gep.8, align 4
+  %gep.9 = getelementptr inbounds i32, ptr %out, i64 2
+  store i32 %mul.3, ptr %gep.9, align 4
+  %gep.10 = getelementptr inbounds i32, ptr %out, i64 3
+  store i32 %mul.4, ptr %gep.10, align 4
 
   ret i32 undef
 }
 
 
-define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias nocapture %out) {
+define i32 @jumbled-load-multiuses(ptr noalias nocapture %in, ptr noalias nocapture %out) {
 ; CHECK-LABEL: @jumbled-load-multiuses(
-; CHECK-NEXT:    [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[IN:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
-  %in.addr = getelementptr inbounds i32, i32* %in, i64 0
-  %load.1 = load i32, i32* %in.addr, align 4
-  %gep.1 = getelementptr inbounds i32, i32* %in.addr, i64 3
-  %load.2 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %in.addr, i64 1
-  %load.3 = load i32, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %in.addr, i64 2
-  %load.4 = load i32, i32* %gep.3, align 4
+  %load.1 = load i32, ptr %in, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %in, i64 3
+  %load.2 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %in, i64 1
+  %load.3 = load i32, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %in, i64 2
+  %load.4 = load i32, ptr %gep.3, align 4
   %mul.1 = mul i32 %load.3, %load.4
   %mul.2 = mul i32 %load.2, %load.2
   %mul.3 = mul i32 %load.4, %load.1
   %mul.4 = mul i32 %load.1, %load.3
-  %gep.7 = getelementptr inbounds i32, i32* %out, i64 0
-  store i32 %mul.1, i32* %gep.7, align 4
-  %gep.8 = getelementptr inbounds i32, i32* %out, i64 1
-  store i32 %mul.2, i32* %gep.8, align 4
-  %gep.9 = getelementptr inbounds i32, i32* %out, i64 2
-  store i32 %mul.3, i32* %gep.9, align 4
-  %gep.10 = getelementptr inbounds i32, i32* %out, i64 3
-  store i32 %mul.4, i32* %gep.10, align 4
+  store i32 %mul.1, ptr %out, align 4
+  %gep.8 = getelementptr inbounds i32, ptr %out, i64 1
+  store i32 %mul.2, ptr %gep.8, align 4
+  %gep.9 = getelementptr inbounds i32, ptr %out, i64 2
+  store i32 %mul.3, ptr %gep.9, align 4
+  %gep.10 = getelementptr inbounds i32, ptr %out, i64 3
+  store i32 %mul.4, ptr %gep.10, align 4
 
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
index 439c0b45e1054..6e67c412d9a43 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -o - -S < %s | FileCheck %s
 
- at b = common dso_local global i32* null, align 8
+ at b = common dso_local global ptr null, align 8
 @e = common dso_local global float 0.000000e+00, align 4
 @c = common dso_local global float 0.000000e+00, align 4
 @g = common dso_local global float 0.000000e+00, align 4
@@ -13,31 +13,29 @@
 define dso_local void @j() local_unnamed_addr {
 ; CHECK-LABEL: @j(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @b, align 8
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @b, align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @a, align 4
 ; CHECK-NEXT:    [[CONV19:%.*]] = sitofp i32 [[TMP1]] to float
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float>
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], <float 1.000000e+01, float 1.000000e+01>
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[TMP10]], i32 1
-; CHECK-NEXT:    store float [[TMP11]], float* @g, align 4
+; CHECK-NEXT:    store float [[TMP11]], ptr @g, align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <4 x float> [[TMP10]], <float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP12]], i32 2
-; CHECK-NEXT:    store float [[TMP13]], float* @c, align 4
+; CHECK-NEXT:    store float [[TMP13]], ptr @c, align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP12]], i32 0
-; CHECK-NEXT:    store float [[TMP14]], float* @d, align 4
+; CHECK-NEXT:    store float [[TMP14]], ptr @d, align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP12]], i32 3
-; CHECK-NEXT:    store float [[TMP15]], float* @e, align 4
+; CHECK-NEXT:    store float [[TMP15]], ptr @e, align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float> [[TMP12]], i32 1
-; CHECK-NEXT:    store float [[TMP16]], float* @f, align 4
+; CHECK-NEXT:    store float [[TMP16]], ptr @f, align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> <float poison, float -1.000000e+00, float poison, float -1.000000e+00>, float [[CONV19]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x float> [[TMP17]], <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 5, i32 3>
@@ -45,52 +43,51 @@ define dso_local void @j() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP21:%.*]] = fadd <4 x float> [[TMP12]], [[TMP19]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    [[TMP23:%.*]] = fptosi <4 x float> [[TMP22]] to <4 x i32>
-; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP23]], <4 x i32>* [[TMP24]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP23]], ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32*, i32** @b, align 8
-  %arrayidx = getelementptr inbounds i32, i32* %0, i64 4
-  %1 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12
-  %2 = load i32, i32* %arrayidx1, align 4
+  %0 = load ptr, ptr @b, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %0, i64 4
+  %1 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %0, i64 12
+  %2 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %2, %1
   %conv = sitofp i32 %add to float
   %mul = fmul float %conv, 1.000000e+01
-  %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 5
-  %3 = load i32, i32* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %0, i64 13
-  %4 = load i32, i32* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %0, i64 5
+  %3 = load i32, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %0, i64 13
+  %4 = load i32, ptr %arrayidx3, align 4
   %add4 = add nsw i32 %4, %3
   %conv5 = sitofp i32 %add4 to float
   %mul6 = fmul float %conv5, 1.000000e+01
   %sub = fsub float 0.000000e+00, %mul6
   %sub7 = fsub float 1.000000e+00, %mul
-  store float %sub7, float* @g, align 4
+  store float %sub7, ptr @g, align 4
   %add9 = fadd float %sub, 1.000000e+00
-  store float %add9, float* @c, align 4
+  store float %add9, ptr @c, align 4
   %sub10 = fadd float %sub, -1.000000e+00
-  store float %sub10, float* @d, align 4
+  store float %sub10, ptr @d, align 4
   %add11 = fadd float %sub7, 1.000000e+00
-  store float %add11, float* @e, align 4
+  store float %add11, ptr @e, align 4
   %sub12 = fadd float %sub7, -1.000000e+00
-  store float %sub12, float* @f, align 4
+  store float %sub12, ptr @f, align 4
   %sub13 = fsub float %add9, %sub
   %conv14 = fptosi float %sub13 to i32
-  %arrayidx15 = getelementptr inbounds i32, i32* %0, i64 14
-  store i32 %conv14, i32* %arrayidx15, align 4
+  %arrayidx15 = getelementptr inbounds i32, ptr %0, i64 14
+  store i32 %conv14, ptr %arrayidx15, align 4
   %sub16 = fadd float %add11, -1.000000e+00
   %conv17 = fptosi float %sub16 to i32
-  %arrayidx18 = getelementptr inbounds i32, i32* %0, i64 15
-  store i32 %conv17, i32* %arrayidx18, align 4
-  %5 = load i32, i32* @a, align 4
+  %arrayidx18 = getelementptr inbounds i32, ptr %0, i64 15
+  store i32 %conv17, ptr %arrayidx18, align 4
+  %5 = load i32, ptr @a, align 4
   %conv19 = sitofp i32 %5 to float
   %sub20 = fsub float %sub10, %conv19
   %conv21 = fptosi float %sub20 to i32
-  store i32 %conv21, i32* %arrayidx1, align 4
+  store i32 %conv21, ptr %arrayidx1, align 4
   %sub23 = fadd float %sub12, -1.000000e+00
   %conv24 = fptosi float %sub23 to i32
-  store i32 %conv24, i32* %arrayidx3, align 4
+  store i32 %conv24, ptr %arrayidx3, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
index 539402134663b..a6e41cad8c292 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
@@ -3,7 +3,7 @@
 
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @foo() personality i32* ()* @bar {
+define void @foo() personality ptr @bar {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
@@ -14,7 +14,7 @@ define void @foo() personality i32* ()* @bar {
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = invoke i32 poison(i8 addrspace(1)* nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ]
+; CHECK-NEXT:    [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ]
 ; CHECK-NEXT:    to label [[BB4:%.*]] unwind label [[BB10:%.*]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    br i1 poison, label [[BB11:%.*]], label [[BB5:%.*]]
@@ -28,7 +28,7 @@ define void @foo() personality i32* ()* @bar {
 ; CHECK:       bb7:
 ; CHECK-NEXT:    [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = invoke i32 poison(i8 addrspace(1)* nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ]
+; CHECK-NEXT:    [[TMP7:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ]
 ; CHECK-NEXT:    to label [[BB8]] unwind label [[BB12:%.*]]
 ; CHECK:       bb8:
 ; CHECK-NEXT:    br i1 poison, label [[BB7]], label [[BB6]]
@@ -40,7 +40,7 @@ define void @foo() personality i32* ()* @bar {
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb10:
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
-; CHECK-NEXT:    [[LANDING_PAD68:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    [[SHUFFLE1]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    br label [[BB9]]
@@ -48,7 +48,7 @@ define void @foo() personality i32* ()* @bar {
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb12:
 ; CHECK-NEXT:    [[TMP11]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ]
-; CHECK-NEXT:    [[LANDING_PAD149:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    br label [[BB9]]
 ;
@@ -68,7 +68,7 @@ bb2:
 bb3:
   %local_10_38123 = phi i32 [ %.lcssa773, %bb6 ], [ poison, %bb1 ]
   %local_5_33118 = phi i32 [ poison, %bb6 ], [ poison, %bb1 ]
-  %0 = invoke i32 poison(i8 addrspace(1)* nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ]
+  %0 = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ]
   to label %bb4 unwind label %bb10
 
 bb4:
@@ -84,7 +84,7 @@ bb6:
 
 bb7:
   %local_5_84111 = phi i32 [ poison, %bb8 ], [ poison, %bb5 ]
-  %1 = invoke i32 poison(i8 addrspace(1)* nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ]
+  %1 = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ]
   to label %bb8 unwind label %bb12
 
 bb8:
@@ -99,7 +99,7 @@ bb9:
 bb10:
   %local_10_38123.lcssa = phi i32 [ %local_10_38123, %bb3 ]
   %local_5_33118.lcssa = phi i32 [ %local_5_33118, %bb3 ]
-  %landing_pad68 = landingpad { i8*, i32 }
+  %landing_pad68 = landingpad { ptr, i32 }
   cleanup
   br label %bb9
 
@@ -109,11 +109,11 @@ bb11:
 bb12:
   %local_10_89113.lcssa = phi i32 [ poison, %bb7 ]
   %local_5_84111.lcssa = phi i32 [ %local_5_84111, %bb7 ]
-  %landing_pad149 = landingpad { i8*, i32 }
+  %landing_pad149 = landingpad { ptr, i32 }
   cleanup
   br label %bb9
 }
 
-declare i32* @bar()
+declare ptr @bar()
 
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/limit.ll b/llvm/test/Transforms/SLPVectorizer/X86/limit.ll
index 82861e1717d70..78ccd17608daf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/limit.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/limit.ll
@@ -19,53 +19,53 @@ define void @addsub() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @c to <4 x i32>*), align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @d to <4 x i32>*), align 16
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @e to <4 x i32>*), align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 16
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 16
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @a, align 16
 ; CHECK-NEXT:    ret void
 ;
 entry:
   br label %bb1
 
 bb1:
-  %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 0), align 16
-  %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 0), align 16
+  %0 = load i32, ptr @b, align 16
+  %1 = load i32, ptr @c, align 16
   %add = add nsw i32 %0, %1
-  %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 0), align 16
-  %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 0), align 16
+  %2 = load i32, ptr @d, align 16
+  %3 = load i32, ptr @e, align 16
   %add1 = add nsw i32 %2, %3
   %add2 = add nsw i32 %add, %add1
-  store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16
-  %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 1), align 4
-  %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 1), align 4
+  store i32 %add2, ptr @a, align 16
+  %4 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i64 0, i64 1), align 4
+  %5 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i64 0, i64 1), align 4
   %add3 = add nsw i32 %4, %5
-  %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 1), align 4
-  %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 1), align 4
+  %6 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i64 0, i64 1), align 4
+  %7 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i64 0, i64 1), align 4
   %add4 = add nsw i32 %6, %7
   %sub = sub nsw i32 %add3, %add4
-  store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4
-  %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 2), align 8
-  %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 2), align 8
+  store i32 %sub, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 1), align 4
+  %8 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i64 0, i64 2), align 8
+  %9 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i64 0, i64 2), align 8
   %add5 = add nsw i32 %8, %9
-  %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 2), align 8
-  %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 2), align 8
+  %10 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i64 0, i64 2), align 8
+  %11 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i64 0, i64 2), align 8
   %add6 = add nsw i32 %10, %11
   %add7 = add nsw i32 %add5, %add6
-  store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8
-  %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 3), align 4
-  %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 3), align 4
+  store i32 %add7, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 2), align 8
+  %12 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i64 0, i64 3), align 4
+  %13 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i64 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 3), align 4
-  %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 3), align 4
+  %14 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i64 0, i64 3), align 4
+  %15 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i64 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %sub10 = sub nsw i32 %add8, %add9
-  store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4
+  store i32 %sub10, ptr getelementptr inbounds ([4 x i32], ptr @a, i64 0, i64 3), align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/load-bitcast-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-bitcast-vec.ll
index 7456cad2dc155..f724022faba34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-bitcast-vec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-bitcast-vec.ll
@@ -2,101 +2,83 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-- -mattr=+avx  | FileCheck %s
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
 
-define float @matching_scalar(<4 x float>* dereferenceable(16) %p) {
+define float @matching_scalar(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @matching_scalar(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to float*
-; CHECK-NEXT:    [[R:%.*]] = load float, float* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load float, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret float [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to float*
-  %r = load float, float* %bc, align 16
+  %r = load float, ptr %p, align 16
   ret float %r
 }
 
-define i32 @nonmatching_scalar(<4 x float>* dereferenceable(16) %p) {
+define i32 @nonmatching_scalar(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @nonmatching_scalar(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i32*
-; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to i32*
-  %r = load i32, i32* %bc, align 16
+  %r = load i32, ptr %p, align 16
   ret i32 %r
 }
 
-define i64 @larger_scalar(<4 x float>* dereferenceable(16) %p) {
+define i64 @larger_scalar(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @larger_scalar(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i64*
-; CHECK-NEXT:    [[R:%.*]] = load i64, i64* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load i64, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to i64*
-  %r = load i64, i64* %bc, align 16
+  %r = load i64, ptr %p, align 16
   ret i64 %r
 }
 
-define i8 @smaller_scalar(<4 x float>* dereferenceable(16) %p) {
+define i8 @smaller_scalar(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @smaller_scalar(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i8*
-; CHECK-NEXT:    [[R:%.*]] = load i8, i8* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load i8, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to i8*
-  %r = load i8, i8* %bc, align 16
+  %r = load i8, ptr %p, align 16
   ret i8 %r
 }
 
-define i8 @smaller_scalar_256bit_vec(<8 x float>* dereferenceable(32) %p) {
+define i8 @smaller_scalar_256bit_vec(ptr dereferenceable(32) %p) {
 ; CHECK-LABEL: @smaller_scalar_256bit_vec(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x float>* [[P:%.*]] to i8*
-; CHECK-NEXT:    [[R:%.*]] = load i8, i8* [[BC]], align 32
+; CHECK-NEXT:    [[R:%.*]] = load i8, ptr [[P:%.*]], align 32
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
-  %bc = bitcast <8 x float>* %p to i8*
-  %r = load i8, i8* %bc, align 32
+  %r = load i8, ptr %p, align 32
   ret i8 %r
 }
 
-define i8 @smaller_scalar_less_aligned(<4 x float>* dereferenceable(16) %p) {
+define i8 @smaller_scalar_less_aligned(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @smaller_scalar_less_aligned(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to i8*
-; CHECK-NEXT:    [[R:%.*]] = load i8, i8* [[BC]], align 4
+; CHECK-NEXT:    [[R:%.*]] = load i8, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to i8*
-  %r = load i8, i8* %bc, align 4
+  %r = load i8, ptr %p, align 4
   ret i8 %r
 }
 
-define float @matching_scalar_small_deref(<4 x float>* dereferenceable(15) %p) {
+define float @matching_scalar_small_deref(ptr dereferenceable(15) %p) {
 ; CHECK-LABEL: @matching_scalar_small_deref(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to float*
-; CHECK-NEXT:    [[R:%.*]] = load float, float* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load float, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret float [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to float*
-  %r = load float, float* %bc, align 16
+  %r = load float, ptr %p, align 16
   ret float %r
 }
 
-define float @matching_scalar_volatile(<4 x float>* dereferenceable(16) %p) {
+define float @matching_scalar_volatile(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @matching_scalar_volatile(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float>* [[P:%.*]] to float*
-; CHECK-NEXT:    [[R:%.*]] = load volatile float, float* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load volatile float, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret float [[R]]
 ;
-  %bc = bitcast <4 x float>* %p to float*
-  %r = load volatile float, float* %bc, align 16
+  %r = load volatile float, ptr %p, align 16
   ret float %r
 }
 
-define float @nonvector(double* dereferenceable(16) %p) {
+define float @nonvector(ptr dereferenceable(16) %p) {
 ; CHECK-LABEL: @nonvector(
-; CHECK-NEXT:    [[BC:%.*]] = bitcast double* [[P:%.*]] to float*
-; CHECK-NEXT:    [[R:%.*]] = load float, float* [[BC]], align 16
+; CHECK-NEXT:    [[R:%.*]] = load float, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    ret float [[R]]
 ;
-  %bc = bitcast double* %p to float*
-  %r = load float, float* %bc, align 16
+  %r = load float, ptr %p, align 16
   ret float %r
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
index 21ff6e79c7cd0..230718d3d5b4f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
@@ -6,67 +6,64 @@
 ;    return le32;
 ;}
 
-define i32 @_Z9load_le32Ph(i8* nocapture readonly %data) {
+define i32 @_Z9load_le32Ph(ptr nocapture readonly %data) {
 ; CHECK-LABEL: @_Z9load_le32Ph(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[DATA:%.*]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DATA:%.*]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 8
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL3]], [[CONV]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; CHECK-NEXT:    [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 16
 ; CHECK-NEXT:    [[OR7:%.*]] = or i32 [[OR]], [[SHL6]]
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 3
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX8]], align 1
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
 ; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
 ; CHECK-NEXT:    [[SHL10:%.*]] = shl nuw i32 [[CONV9]], 24
 ; CHECK-NEXT:    [[OR11:%.*]] = or i32 [[OR7]], [[SHL10]]
 ; CHECK-NEXT:    ret i32 [[OR11]]
 ;
 entry:
-  %0 = load i8, i8* %data, align 1
+  %0 = load i8, ptr %data, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %data, i64 1
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %data, i64 1
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %shl3 = shl nuw nsw i32 %conv2, 8
   %or = or i32 %shl3, %conv
-  %arrayidx4 = getelementptr inbounds i8, i8* %data, i64 2
-  %2 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %data, i64 2
+  %2 = load i8, ptr %arrayidx4, align 1
   %conv5 = zext i8 %2 to i32
   %shl6 = shl nuw nsw i32 %conv5, 16
   %or7 = or i32 %or, %shl6
-  %arrayidx8 = getelementptr inbounds i8, i8* %data, i64 3
-  %3 = load i8, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %data, i64 3
+  %3 = load i8, ptr %arrayidx8, align 1
   %conv9 = zext i8 %3 to i32
   %shl10 = shl nuw i32 %conv9, 24
   %or11 = or i32 %or7, %shl10
   ret i32 %or11
 }
 
-define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceable(16) %x) {
+define <4 x float> @PR16739_byref(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byref(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = load float, float* [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds <4 x float>, ptr [[X:%.*]], i64 0, i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[X]], align 4
+; CHECK-NEXT:    [[X2:%.*]] = load float, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x float> [[TMP3]], float [[X2]], i32 2
 ; CHECK-NEXT:    [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[X2]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[I3]]
 ;
-  %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 2
-  %x0 = load float, float* %gep0
-  %x1 = load float, float* %gep1
-  %x2 = load float, float* %gep2
+  %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
+  %gep2 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 2
+  %x0 = load float, ptr %x
+  %x1 = load float, ptr %gep1
+  %x2 = load float, ptr %gep2
   %i0 = insertelement <4 x float> poison, float %x0, i32 0
   %i1 = insertelement <4 x float> %i0, float %x1, i32 1
   %i2 = insertelement <4 x float> %i1, float %x2, i32 2
@@ -74,18 +71,15 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
   ret <4 x float> %i3
 }
 
-define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) {
+define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byref_alt(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
 ; CHECK-NEXT:    ret <4 x float> [[SHUFFLE]]
 ;
-  %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
-  %x0 = load float, float* %gep0
-  %x1 = load float, float* %gep1
+  %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
+  %x0 = load float, ptr %x
+  %x1 = load float, ptr %gep1
   %i0 = insertelement <4 x float> poison, float %x0, i32 0
   %i1 = insertelement <4 x float> %i0, float %x0, i32 1
   %i2 = insertelement <4 x float> %i1, float %x1, i32 2
@@ -93,13 +87,11 @@ define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenc
   ret <4 x float> %i3
 }
 
-define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceable(16) %x) {
+define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byval(
-; CHECK-NEXT:    [[T0:%.*]] = bitcast <4 x float>* [[X:%.*]] to i64*
-; CHECK-NEXT:    [[T1:%.*]] = load i64, i64* [[T0]], align 16
-; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
-; CHECK-NEXT:    [[T3:%.*]] = bitcast float* [[T2]] to i64*
-; CHECK-NEXT:    [[T4:%.*]] = load i64, i64* [[T3]], align 8
+; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[X:%.*]], align 16
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds <4 x float>, ptr [[X]], i64 0, i64 2
+; CHECK-NEXT:    [[T4:%.*]] = load i64, ptr [[T2]], align 8
 ; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T1]] to i32
 ; CHECK-NEXT:    [[T6:%.*]] = bitcast i32 [[T5]] to float
 ; CHECK-NEXT:    [[T7:%.*]] = insertelement <4 x float> poison, float [[T6]], i32 0
@@ -113,11 +105,9 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
 ; CHECK-NEXT:    [[T15:%.*]] = insertelement <4 x float> [[T14]], float [[T13]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[T15]]
 ;
-  %t0 = bitcast <4 x float>* %x to i64*
-  %t1 = load i64, i64* %t0, align 16
-  %t2 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 2
-  %t3 = bitcast float* %t2 to i64*
-  %t4 = load i64, i64* %t3, align 8
+  %t1 = load i64, ptr %x, align 16
+  %t2 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 2
+  %t4 = load i64, ptr %t2, align 8
   %t5 = trunc i64 %t1 to i32
   %t6 = bitcast i32 %t5 to float
   %t7 = insertelement <4 x float> poison, float %t6, i32 0
@@ -132,61 +122,53 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
   ret <4 x float> %t15
 }
 
-define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 {
+define void @PR43578_prefer128(ptr %r, ptr %p, ptr %q) #0 {
 ; CHECK-LABEL: @PR43578_prefer128(
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0
-; CHECK-NEXT:    [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[Q0]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2
+; CHECK-NEXT:    [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[P]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[Q]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[P2]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 2
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[Q2]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr [[P2]], align 2
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, ptr [[Q2]], align 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <2 x i64> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; CHECK-NEXT:    [[G0:%.*]] = getelementptr inbounds i32, i32* [[R:%.*]], i64 [[TMP11]]
+; CHECK-NEXT:    [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP12]]
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP12]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
-; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP13]]
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP13]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
-; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP14]]
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP14]]
 ; CHECK-NEXT:    ret void
 ;
-  %p0 = getelementptr inbounds i64, i64* %p, i64 0
-  %p1 = getelementptr inbounds i64, i64* %p, i64 1
-  %p2 = getelementptr inbounds i64, i64* %p, i64 2
-  %p3 = getelementptr inbounds i64, i64* %p, i64 3
+  %p1 = getelementptr inbounds i64, ptr %p, i64 1
+  %p2 = getelementptr inbounds i64, ptr %p, i64 2
+  %p3 = getelementptr inbounds i64, ptr %p, i64 3
 
-  %q0 = getelementptr inbounds i64, i64* %q, i64 0
-  %q1 = getelementptr inbounds i64, i64* %q, i64 1
-  %q2 = getelementptr inbounds i64, i64* %q, i64 2
-  %q3 = getelementptr inbounds i64, i64* %q, i64 3
+  %q1 = getelementptr inbounds i64, ptr %q, i64 1
+  %q2 = getelementptr inbounds i64, ptr %q, i64 2
+  %q3 = getelementptr inbounds i64, ptr %q, i64 3
 
-  %x0 = load i64, i64* %p0, align 2
-  %x1 = load i64, i64* %p1, align 2
-  %x2 = load i64, i64* %p2, align 2
-  %x3 = load i64, i64* %p3, align 2
+  %x0 = load i64, ptr %p, align 2
+  %x1 = load i64, ptr %p1, align 2
+  %x2 = load i64, ptr %p2, align 2
+  %x3 = load i64, ptr %p3, align 2
 
-  %y0 = load i64, i64* %q0, align 2
-  %y1 = load i64, i64* %q1, align 2
-  %y2 = load i64, i64* %q2, align 2
-  %y3 = load i64, i64* %q3, align 2
+  %y0 = load i64, ptr %q, align 2
+  %y1 = load i64, ptr %q1, align 2
+  %y2 = load i64, ptr %q2, align 2
+  %y3 = load i64, ptr %q3, align 2
 
   %sub0 = sub nsw i64 %x0, %y0
   %sub1 = sub nsw i64 %x1, %y1
   %sub2 = sub nsw i64 %x2, %y2
   %sub3 = sub nsw i64 %x3, %y3
 
-  %g0 = getelementptr inbounds i32, i32* %r, i64 %sub0
-  %g1 = getelementptr inbounds i32, i32* %r, i64 %sub1
-  %g2 = getelementptr inbounds i32, i32* %r, i64 %sub2
-  %g3 = getelementptr inbounds i32, i32* %r, i64 %sub3
+  %g0 = getelementptr inbounds i32, ptr %r, i64 %sub0
+  %g1 = getelementptr inbounds i32, ptr %r, i64 %sub1
+  %g2 = getelementptr inbounds i32, ptr %r, i64 %sub2
+  %g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
index 20534783b4ae9..abe35ef79f5b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -6,67 +6,64 @@
 ;    return le32;
 ;}
 
-define i32 @_Z9load_le32Ph(i8* nocapture readonly %data) {
+define i32 @_Z9load_le32Ph(ptr nocapture readonly %data) {
 ; CHECK-LABEL: @_Z9load_le32Ph(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[DATA:%.*]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DATA:%.*]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 8
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL3]], [[CONV]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; CHECK-NEXT:    [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 16
 ; CHECK-NEXT:    [[OR7:%.*]] = or i32 [[OR]], [[SHL6]]
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 3
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX8]], align 1
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
 ; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
 ; CHECK-NEXT:    [[SHL10:%.*]] = shl nuw i32 [[CONV9]], 24
 ; CHECK-NEXT:    [[OR11:%.*]] = or i32 [[OR7]], [[SHL10]]
 ; CHECK-NEXT:    ret i32 [[OR11]]
 ;
 entry:
-  %0 = load i8, i8* %data, align 1
+  %0 = load i8, ptr %data, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %data, i64 1
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %data, i64 1
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %shl3 = shl nuw nsw i32 %conv2, 8
   %or = or i32 %shl3, %conv
-  %arrayidx4 = getelementptr inbounds i8, i8* %data, i64 2
-  %2 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %data, i64 2
+  %2 = load i8, ptr %arrayidx4, align 1
   %conv5 = zext i8 %2 to i32
   %shl6 = shl nuw nsw i32 %conv5, 16
   %or7 = or i32 %or, %shl6
-  %arrayidx8 = getelementptr inbounds i8, i8* %data, i64 3
-  %3 = load i8, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %data, i64 3
+  %3 = load i8, ptr %arrayidx8, align 1
   %conv9 = zext i8 %3 to i32
   %shl10 = shl nuw i32 %conv9, 24
   %or11 = or i32 %or7, %shl10
   ret i32 %or11
 }
 
-define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceable(16) %x) {
+define <4 x float> @PR16739_byref(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byref(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[X2:%.*]] = load float, float* [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds <4 x float>, ptr [[X:%.*]], i64 0, i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[X]], align 4
+; CHECK-NEXT:    [[X2:%.*]] = load float, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x float> [[TMP3]], float [[X2]], i32 2
 ; CHECK-NEXT:    [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[X2]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[I3]]
 ;
-  %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 2
-  %x0 = load float, float* %gep0
-  %x1 = load float, float* %gep1
-  %x2 = load float, float* %gep2
+  %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
+  %gep2 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 2
+  %x0 = load float, ptr %x
+  %x1 = load float, ptr %gep1
+  %x2 = load float, ptr %gep2
   %i0 = insertelement <4 x float> undef, float %x0, i32 0
   %i1 = insertelement <4 x float> %i0, float %x1, i32 1
   %i2 = insertelement <4 x float> %i1, float %x2, i32 2
@@ -74,18 +71,15 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
   ret <4 x float> %i3
 }
 
-define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) {
+define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byref_alt(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
 ; CHECK-NEXT:    ret <4 x float> [[SHUFFLE]]
 ;
-  %gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
-  %x0 = load float, float* %gep0
-  %x1 = load float, float* %gep1
+  %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
+  %x0 = load float, ptr %x
+  %x1 = load float, ptr %gep1
   %i0 = insertelement <4 x float> undef, float %x0, i32 0
   %i1 = insertelement <4 x float> %i0, float %x0, i32 1
   %i2 = insertelement <4 x float> %i1, float %x1, i32 2
@@ -93,13 +87,11 @@ define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenc
   ret <4 x float> %i3
 }
 
-define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceable(16) %x) {
+define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byval(
-; CHECK-NEXT:    [[T0:%.*]] = bitcast <4 x float>* [[X:%.*]] to i64*
-; CHECK-NEXT:    [[T1:%.*]] = load i64, i64* [[T0]], align 16
-; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
-; CHECK-NEXT:    [[T3:%.*]] = bitcast float* [[T2]] to i64*
-; CHECK-NEXT:    [[T4:%.*]] = load i64, i64* [[T3]], align 8
+; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[X:%.*]], align 16
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds <4 x float>, ptr [[X]], i64 0, i64 2
+; CHECK-NEXT:    [[T4:%.*]] = load i64, ptr [[T2]], align 8
 ; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T1]] to i32
 ; CHECK-NEXT:    [[T6:%.*]] = bitcast i32 [[T5]] to float
 ; CHECK-NEXT:    [[T7:%.*]] = insertelement <4 x float> undef, float [[T6]], i32 0
@@ -113,11 +105,9 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
 ; CHECK-NEXT:    [[T15:%.*]] = insertelement <4 x float> [[T14]], float [[T13]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[T15]]
 ;
-  %t0 = bitcast <4 x float>* %x to i64*
-  %t1 = load i64, i64* %t0, align 16
-  %t2 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 2
-  %t3 = bitcast float* %t2 to i64*
-  %t4 = load i64, i64* %t3, align 8
+  %t1 = load i64, ptr %x, align 16
+  %t2 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 2
+  %t4 = load i64, ptr %t2, align 8
   %t5 = trunc i64 %t1 to i32
   %t6 = bitcast i32 %t5 to float
   %t7 = insertelement <4 x float> undef, float %t6, i32 0
@@ -132,61 +122,53 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
   ret <4 x float> %t15
 }
 
-define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 {
+define void @PR43578_prefer128(ptr %r, ptr %p, ptr %q) #0 {
 ; CHECK-LABEL: @PR43578_prefer128(
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
-; CHECK-NEXT:    [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0
-; CHECK-NEXT:    [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[Q0]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2
+; CHECK-NEXT:    [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[P]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[Q]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[P2]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 2
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[Q2]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr [[P2]], align 2
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i64>, ptr [[Q2]], align 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <2 x i64> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
-; CHECK-NEXT:    [[G0:%.*]] = getelementptr inbounds i32, i32* [[R:%.*]], i64 [[TMP11]]
+; CHECK-NEXT:    [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP12]]
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP12]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
-; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP13]]
+; CHECK-NEXT:    [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP13]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
-; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i32, i32* [[R]], i64 [[TMP14]]
+; CHECK-NEXT:    [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP14]]
 ; CHECK-NEXT:    ret void
 ;
-  %p0 = getelementptr inbounds i64, i64* %p, i64 0
-  %p1 = getelementptr inbounds i64, i64* %p, i64 1
-  %p2 = getelementptr inbounds i64, i64* %p, i64 2
-  %p3 = getelementptr inbounds i64, i64* %p, i64 3
+  %p1 = getelementptr inbounds i64, ptr %p, i64 1
+  %p2 = getelementptr inbounds i64, ptr %p, i64 2
+  %p3 = getelementptr inbounds i64, ptr %p, i64 3
 
-  %q0 = getelementptr inbounds i64, i64* %q, i64 0
-  %q1 = getelementptr inbounds i64, i64* %q, i64 1
-  %q2 = getelementptr inbounds i64, i64* %q, i64 2
-  %q3 = getelementptr inbounds i64, i64* %q, i64 3
+  %q1 = getelementptr inbounds i64, ptr %q, i64 1
+  %q2 = getelementptr inbounds i64, ptr %q, i64 2
+  %q3 = getelementptr inbounds i64, ptr %q, i64 3
 
-  %x0 = load i64, i64* %p0, align 2
-  %x1 = load i64, i64* %p1, align 2
-  %x2 = load i64, i64* %p2, align 2
-  %x3 = load i64, i64* %p3, align 2
+  %x0 = load i64, ptr %p, align 2
+  %x1 = load i64, ptr %p1, align 2
+  %x2 = load i64, ptr %p2, align 2
+  %x3 = load i64, ptr %p3, align 2
 
-  %y0 = load i64, i64* %q0, align 2
-  %y1 = load i64, i64* %q1, align 2
-  %y2 = load i64, i64* %q2, align 2
-  %y3 = load i64, i64* %q3, align 2
+  %y0 = load i64, ptr %q, align 2
+  %y1 = load i64, ptr %q1, align 2
+  %y2 = load i64, ptr %q2, align 2
+  %y3 = load i64, ptr %q3, align 2
 
   %sub0 = sub nsw i64 %x0, %y0
   %sub1 = sub nsw i64 %x1, %y1
   %sub2 = sub nsw i64 %x2, %y2
   %sub3 = sub nsw i64 %x3, %y3
 
-  %g0 = getelementptr inbounds i32, i32* %r, i64 %sub0
-  %g1 = getelementptr inbounds i32, i32* %r, i64 %sub1
-  %g2 = getelementptr inbounds i32, i32* %r, i64 %sub2
-  %g3 = getelementptr inbounds i32, i32* %r, i64 %sub3
+  %g0 = getelementptr inbounds i32, ptr %r, i64 %sub0
+  %g1 = getelementptr inbounds i32, ptr %r, i64 %sub1
+  %g2 = getelementptr inbounds i32, ptr %r, i64 %sub2
+  %g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
index 2bcac66b12fb4..252da8d7fa7a7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -6,11 +6,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; At this point we can't vectorize only parts of the tree.
 
-define i32 @test(double* nocapture %A, i8* nocapture %B) {
+define i32 @test(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = sitofp <2 x i8> [[TMP2]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP3]]
@@ -23,14 +22,13 @@ define i32 @test(double* nocapture %A, i8* nocapture %B) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 1.000000e+00, double 1.000000e+00>
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i8, i8* %B, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %B, i64 1
-  %1 = load i8, i8* %arrayidx1, align 1
+  %0 = load i8, ptr %B, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %B, i64 1
+  %1 = load i8, ptr %arrayidx1, align 1
   %add = add i8 %0, 3
   %add4 = add i8 %1, 3
   %conv6 = sitofp i8 %add to double
@@ -55,8 +53,8 @@ entry:
   %add24 = fadd double %mul23, 1.000000e+00
   %mul25 = fmul double %add22, %add22
   %add26 = fadd double %mul25, 1.000000e+00
-  store double %add24, double* %A, align 8
-  %arrayidx28 = getelementptr inbounds double, double* %A, i64 1
-  store double %add26, double* %arrayidx28, align 8
+  store double %add24, ptr %A, align 8
+  %arrayidx28 = getelementptr inbounds double, ptr %A, i64 1
+  store double %add26, ptr %arrayidx28, align 8
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index 168ffb62cdcc0..1ed2841a770db 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -17,46 +17,39 @@
 ;          |                    |
 ;         S[0]                 S[1]
 ;
-define void @lookahead_basic(double* %array) {
+define void @lookahead_basic(ptr %array) {
 ; CHECK-LABEL: @lookahead_basic(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
-; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
-; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[IDX4]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[IDX6]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP8]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[ARRAY]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %idx0 = getelementptr inbounds double, double* %array, i64 0
-  %idx1 = getelementptr inbounds double, double* %array, i64 1
-  %idx2 = getelementptr inbounds double, double* %array, i64 2
-  %idx3 = getelementptr inbounds double, double* %array, i64 3
-  %idx4 = getelementptr inbounds double, double* %array, i64 4
-  %idx5 = getelementptr inbounds double, double* %array, i64 5
-  %idx6 = getelementptr inbounds double, double* %array, i64 6
-  %idx7 = getelementptr inbounds double, double* %array, i64 7
-
-  %A_0 = load double, double *%idx0, align 8
-  %A_1 = load double, double *%idx1, align 8
-  %B_0 = load double, double *%idx2, align 8
-  %B_1 = load double, double *%idx3, align 8
-  %C_0 = load double, double *%idx4, align 8
-  %C_1 = load double, double *%idx5, align 8
-  %D_0 = load double, double *%idx6, align 8
-  %D_1 = load double, double *%idx7, align 8
+  %idx1 = getelementptr inbounds double, ptr %array, i64 1
+  %idx2 = getelementptr inbounds double, ptr %array, i64 2
+  %idx3 = getelementptr inbounds double, ptr %array, i64 3
+  %idx4 = getelementptr inbounds double, ptr %array, i64 4
+  %idx5 = getelementptr inbounds double, ptr %array, i64 5
+  %idx6 = getelementptr inbounds double, ptr %array, i64 6
+  %idx7 = getelementptr inbounds double, ptr %array, i64 7
+
+  %A_0 = load double, ptr %array, align 8
+  %A_1 = load double, ptr %idx1, align 8
+  %B_0 = load double, ptr %idx2, align 8
+  %B_1 = load double, ptr %idx3, align 8
+  %C_0 = load double, ptr %idx4, align 8
+  %C_1 = load double, ptr %idx5, align 8
+  %D_0 = load double, ptr %idx6, align 8
+  %D_1 = load double, ptr %idx7, align 8
 
   %subAB_0 = fsub fast double %A_0, %B_0
   %subCD_0 = fsub fast double %C_0, %D_0
@@ -67,8 +60,8 @@ entry:
   %addABCD_0 = fadd fast double %subAB_0, %subCD_0
   %addCDAB_1 = fadd fast double %subCD_1, %subAB_1
 
-  store double %addABCD_0, double *%idx0, align 8
-  store double %addCDAB_1, double *%idx1, align 8
+  store double %addABCD_0, ptr %array, align 8
+  store double %addCDAB_1, ptr %idx1, align 8
   ret void
 }
 
@@ -84,40 +77,35 @@ entry:
 ;          |                    |
 ;         S[0]                 S[1]
 ;
-define void @lookahead_alt1(double* %array) {
+define void @lookahead_alt1(ptr %array) {
 ; CHECK-LABEL: @lookahead_alt1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
-; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
-; CHECK-NEXT:    [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
-; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
-; CHECK-NEXT:    [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    [[IDX5:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 5
+; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6
+; CHECK-NEXT:    [[IDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 7
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP6]], ptr [[ARRAY]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %idx0 = getelementptr inbounds double, double* %array, i64 0
-  %idx1 = getelementptr inbounds double, double* %array, i64 1
-  %idx2 = getelementptr inbounds double, double* %array, i64 2
-  %idx3 = getelementptr inbounds double, double* %array, i64 3
-  %idx4 = getelementptr inbounds double, double* %array, i64 4
-  %idx5 = getelementptr inbounds double, double* %array, i64 5
-  %idx6 = getelementptr inbounds double, double* %array, i64 6
-  %idx7 = getelementptr inbounds double, double* %array, i64 7
-
-  %A_0 = load double, double *%idx0, align 8
-  %A_1 = load double, double *%idx1, align 8
-  %B_0 = load double, double *%idx2, align 8
-  %B_1 = load double, double *%idx3, align 8
+  %idx1 = getelementptr inbounds double, ptr %array, i64 1
+  %idx2 = getelementptr inbounds double, ptr %array, i64 2
+  %idx3 = getelementptr inbounds double, ptr %array, i64 3
+  %idx4 = getelementptr inbounds double, ptr %array, i64 4
+  %idx5 = getelementptr inbounds double, ptr %array, i64 5
+  %idx6 = getelementptr inbounds double, ptr %array, i64 6
+  %idx7 = getelementptr inbounds double, ptr %array, i64 7
+
+  %A_0 = load double, ptr %array, align 8
+  %A_1 = load double, ptr %idx1, align 8
+  %B_0 = load double, ptr %idx2, align 8
+  %B_1 = load double, ptr %idx3, align 8
 
   %addAB_0_L = fadd fast double %A_0, %B_0
   %subAB_0_R = fsub fast double %A_0, %B_0
@@ -128,8 +116,8 @@ entry:
   %addABCD_0 = fadd fast double %addAB_0_L, %subAB_0_R
   %addCDAB_1 = fadd fast double %subAB_1_L, %addAB_1_R
 
-  store double %addABCD_0, double *%idx0, align 8
-  store double %addCDAB_1, double *%idx1, align 8
+  store double %addABCD_0, ptr %array, align 8
+  store double %addCDAB_1, ptr %idx1, align 8
   ret void
 }
 
@@ -145,21 +133,16 @@ entry:
 ;          |                    |
 ;         S[0]                 S[1]
 ;
-define void @lookahead_alt2(double* %array) {
+define void @lookahead_alt2(ptr %array) {
 ; CHECK-LABEL: @lookahead_alt2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
-; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
-; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
-; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 2
+; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 4
+; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, ptr [[ARRAY]], i64 6
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[IDX2]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[IDX4]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[IDX6]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
@@ -167,28 +150,26 @@ define void @lookahead_alt2(double* %array) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP14]], <2 x double>* [[TMP15]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP14]], ptr [[ARRAY]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %idx0 = getelementptr inbounds double, double* %array, i64 0
-  %idx1 = getelementptr inbounds double, double* %array, i64 1
-  %idx2 = getelementptr inbounds double, double* %array, i64 2
-  %idx3 = getelementptr inbounds double, double* %array, i64 3
-  %idx4 = getelementptr inbounds double, double* %array, i64 4
-  %idx5 = getelementptr inbounds double, double* %array, i64 5
-  %idx6 = getelementptr inbounds double, double* %array, i64 6
-  %idx7 = getelementptr inbounds double, double* %array, i64 7
-
-  %A_0 = load double, double *%idx0, align 8
-  %A_1 = load double, double *%idx1, align 8
-  %B_0 = load double, double *%idx2, align 8
-  %B_1 = load double, double *%idx3, align 8
-  %C_0 = load double, double *%idx4, align 8
-  %C_1 = load double, double *%idx5, align 8
-  %D_0 = load double, double *%idx6, align 8
-  %D_1 = load double, double *%idx7, align 8
+  %idx1 = getelementptr inbounds double, ptr %array, i64 1
+  %idx2 = getelementptr inbounds double, ptr %array, i64 2
+  %idx3 = getelementptr inbounds double, ptr %array, i64 3
+  %idx4 = getelementptr inbounds double, ptr %array, i64 4
+  %idx5 = getelementptr inbounds double, ptr %array, i64 5
+  %idx6 = getelementptr inbounds double, ptr %array, i64 6
+  %idx7 = getelementptr inbounds double, ptr %array, i64 7
+
+  %A_0 = load double, ptr %array, align 8
+  %A_1 = load double, ptr %idx1, align 8
+  %B_0 = load double, ptr %idx2, align 8
+  %B_1 = load double, ptr %idx3, align 8
+  %C_0 = load double, ptr %idx4, align 8
+  %C_1 = load double, ptr %idx5, align 8
+  %D_0 = load double, ptr %idx6, align 8
+  %D_1 = load double, ptr %idx7, align 8
 
   %addAB_0 = fadd fast double %A_0, %B_0
   %subCD_0 = fsub fast double %C_0, %D_0
@@ -199,8 +180,8 @@ entry:
   %addABCD_0 = fadd fast double %addAB_0, %subCD_0
   %addCDAB_1 = fadd fast double %addCD_1, %subAB_1
 
-  store double %addABCD_0, double *%idx0, align 8
-  store double %addCDAB_1, double *%idx1, align 8
+  store double %addABCD_0, ptr %array, align 8
+  store double %addCDAB_1, ptr %idx1, align 8
   ret void
 }
 
@@ -217,25 +198,19 @@ entry:
 ; SLP should reorder the operands of the RHS add taking into consideration the cost of external uses.
 ; It is more profitable to reorder the operands of the RHS add, because A[1] has an external use.
 
-define void @lookahead_external_uses(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2) {
+define void @lookahead_external_uses(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S, ptr %Ext1, ptr %Ext2) {
 ; CHECK-LABEL: @lookahead_external_uses(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
-; CHECK-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
-; CHECK-NEXT:    [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
-; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
-; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
-; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
-; CHECK-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
-; CHECK-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
-; CHECK-NEXT:    [[D0:%.*]] = load double, double* [[IDXD0]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[B2:%.*]] = load double, double* [[IDXB2]], align 8
-; CHECK-NEXT:    [[A2:%.*]] = load double, double* [[IDXA2]], align 8
-; CHECK-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
+; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 2
+; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2
+; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    [[B0:%.*]] = load double, ptr [[B]], align 8
+; CHECK-NEXT:    [[C0:%.*]] = load double, ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[D0:%.*]] = load double, ptr [[D:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8
+; CHECK-NEXT:    [[B2:%.*]] = load double, ptr [[IDXB2]], align 8
+; CHECK-NEXT:    [[A2:%.*]] = load double, ptr [[IDXA2]], align 8
+; CHECK-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
@@ -245,32 +220,27 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[S:%.*]], align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
-; CHECK-NEXT:    store double [[TMP12]], double* [[EXT1:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP12]], ptr [[EXT1:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %IdxA0 = getelementptr inbounds double, double* %A, i64 0
-  %IdxB0 = getelementptr inbounds double, double* %B, i64 0
-  %IdxC0 = getelementptr inbounds double, double* %C, i64 0
-  %IdxD0 = getelementptr inbounds double, double* %D, i64 0
-
-  %IdxA1 = getelementptr inbounds double, double* %A, i64 1
-  %IdxB2 = getelementptr inbounds double, double* %B, i64 2
-  %IdxA2 = getelementptr inbounds double, double* %A, i64 2
-  %IdxB1 = getelementptr inbounds double, double* %B, i64 1
-
-  %A0 = load double, double *%IdxA0, align 8
-  %B0 = load double, double *%IdxB0, align 8
-  %C0 = load double, double *%IdxC0, align 8
-  %D0 = load double, double *%IdxD0, align 8
-
-  %A1 = load double, double *%IdxA1, align 8
-  %B2 = load double, double *%IdxB2, align 8
-  %A2 = load double, double *%IdxA2, align 8
-  %B1 = load double, double *%IdxB1, align 8
+
+  %IdxA1 = getelementptr inbounds double, ptr %A, i64 1
+  %IdxB2 = getelementptr inbounds double, ptr %B, i64 2
+  %IdxA2 = getelementptr inbounds double, ptr %A, i64 2
+  %IdxB1 = getelementptr inbounds double, ptr %B, i64 1
+
+  %A0 = load double, ptr %A, align 8
+  %B0 = load double, ptr %B, align 8
+  %C0 = load double, ptr %C, align 8
+  %D0 = load double, ptr %D, align 8
+
+  %A1 = load double, ptr %IdxA1, align 8
+  %B2 = load double, ptr %IdxB2, align 8
+  %A2 = load double, ptr %IdxA2, align 8
+  %B1 = load double, ptr %IdxB1, align 8
 
   %subA0B0 = fsub fast double %A0, %B0
   %subC0D0 = fsub fast double %C0, %D0
@@ -281,14 +251,13 @@ entry:
   %add0 = fadd fast double %subA0B0, %subC0D0
   %add1 = fadd fast double %subA1B2, %subA2B1
 
-  %IdxS0 = getelementptr inbounds double, double* %S, i64 0
-  %IdxS1 = getelementptr inbounds double, double* %S, i64 1
+  %IdxS1 = getelementptr inbounds double, ptr %S, i64 1
 
-  store double %add0, double *%IdxS0, align 8
-  store double %add1, double *%IdxS1, align 8
+  store double %add0, ptr %S, align 8
+  store double %add1, ptr %IdxS1, align 8
 
   ; External use
-  store double %A1, double *%Ext1, align 8
+  store double %A1, ptr %Ext1, align 8
   ret void
 }
 
@@ -307,25 +276,19 @@ entry:
 ; The result is that the operands are of the Add not reordered and the loads
 ; from A get vectorized instead of the loads from B.
 ;
-define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2, double *%Ext3, double *%Ext4, double *%Ext5) {
+define void @lookahead_limit_users_budget(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S, ptr %Ext1, ptr %Ext2, ptr %Ext3, ptr %Ext4, ptr %Ext5) {
 ; CHECK-LABEL: @lookahead_limit_users_budget(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
-; CHECK-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
-; CHECK-NEXT:    [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
-; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
-; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
-; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
-; CHECK-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
-; CHECK-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
-; CHECK-NEXT:    [[D0:%.*]] = load double, double* [[IDXD0]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[B2:%.*]] = load double, double* [[IDXB2]], align 8
-; CHECK-NEXT:    [[A2:%.*]] = load double, double* [[IDXA2]], align 8
-; CHECK-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
+; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 2
+; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2
+; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    [[B0:%.*]] = load double, ptr [[B]], align 8
+; CHECK-NEXT:    [[C0:%.*]] = load double, ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[D0:%.*]] = load double, ptr [[D:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8
+; CHECK-NEXT:    [[B2:%.*]] = load double, ptr [[IDXB2]], align 8
+; CHECK-NEXT:    [[A2:%.*]] = load double, ptr [[IDXA2]], align 8
+; CHECK-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
@@ -335,36 +298,31 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[S:%.*]], align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
-; CHECK-NEXT:    store double [[TMP12]], double* [[EXT1:%.*]], align 8
-; CHECK-NEXT:    store double [[TMP12]], double* [[EXT2:%.*]], align 8
-; CHECK-NEXT:    store double [[TMP12]], double* [[EXT3:%.*]], align 8
-; CHECK-NEXT:    store double [[B1]], double* [[EXT4:%.*]], align 8
-; CHECK-NEXT:    store double [[B1]], double* [[EXT5:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP12]], ptr [[EXT1:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP12]], ptr [[EXT2:%.*]], align 8
+; CHECK-NEXT:    store double [[TMP12]], ptr [[EXT3:%.*]], align 8
+; CHECK-NEXT:    store double [[B1]], ptr [[EXT4:%.*]], align 8
+; CHECK-NEXT:    store double [[B1]], ptr [[EXT5:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %IdxA0 = getelementptr inbounds double, double* %A, i64 0
-  %IdxB0 = getelementptr inbounds double, double* %B, i64 0
-  %IdxC0 = getelementptr inbounds double, double* %C, i64 0
-  %IdxD0 = getelementptr inbounds double, double* %D, i64 0
-
-  %IdxA1 = getelementptr inbounds double, double* %A, i64 1
-  %IdxB2 = getelementptr inbounds double, double* %B, i64 2
-  %IdxA2 = getelementptr inbounds double, double* %A, i64 2
-  %IdxB1 = getelementptr inbounds double, double* %B, i64 1
-
-  %A0 = load double, double *%IdxA0, align 8
-  %B0 = load double, double *%IdxB0, align 8
-  %C0 = load double, double *%IdxC0, align 8
-  %D0 = load double, double *%IdxD0, align 8
-
-  %A1 = load double, double *%IdxA1, align 8
-  %B2 = load double, double *%IdxB2, align 8
-  %A2 = load double, double *%IdxA2, align 8
-  %B1 = load double, double *%IdxB1, align 8
+
+  %IdxA1 = getelementptr inbounds double, ptr %A, i64 1
+  %IdxB2 = getelementptr inbounds double, ptr %B, i64 2
+  %IdxA2 = getelementptr inbounds double, ptr %A, i64 2
+  %IdxB1 = getelementptr inbounds double, ptr %B, i64 1
+
+  %A0 = load double, ptr %A, align 8
+  %B0 = load double, ptr %B, align 8
+  %C0 = load double, ptr %C, align 8
+  %D0 = load double, ptr %D, align 8
+
+  %A1 = load double, ptr %IdxA1, align 8
+  %B2 = load double, ptr %IdxB2, align 8
+  %A2 = load double, ptr %IdxA2, align 8
+  %B1 = load double, ptr %IdxB1, align 8
 
   %subA0B0 = fsub fast double %A0, %B0
   %subC0D0 = fsub fast double %C0, %D0
@@ -375,20 +333,19 @@ entry:
   %add0 = fadd fast double %subA0B0, %subC0D0
   %add1 = fadd fast double %subA1B2, %subA2B1
 
-  %IdxS0 = getelementptr inbounds double, double* %S, i64 0
-  %IdxS1 = getelementptr inbounds double, double* %S, i64 1
+  %IdxS1 = getelementptr inbounds double, ptr %S, i64 1
 
-  store double %add0, double *%IdxS0, align 8
-  store double %add1, double *%IdxS1, align 8
+  store double %add0, ptr %S, align 8
+  store double %add1, ptr %IdxS1, align 8
 
   ; External uses of A1
-  store double %A1, double *%Ext1, align 8
-  store double %A1, double *%Ext2, align 8
-  store double %A1, double *%Ext3, align 8
+  store double %A1, ptr %Ext1, align 8
+  store double %A1, ptr %Ext2, align 8
+  store double %A1, ptr %Ext3, align 8
 
   ; External uses of B1
-  store double %B1, double *%Ext4, align 8
-  store double %B1, double *%Ext5, align 8
+  store double %B1, ptr %Ext4, align 8
+  store double %B1, ptr %Ext5, align 8
 
   ret void
 }
@@ -396,53 +353,45 @@ entry:
 ; This checks that the lookahead code does not crash when instructions with the same opcodes have 
diff erent numbers of operands (in this case the calls).
 
 %Class = type { i8 }
-declare double @_ZN1i2ayEv(%Class*)
+declare double @_ZN1i2ayEv(ptr)
 declare double @_ZN1i2axEv()
 
-define void @lookahead_crash(double* %A, double *%S, %Class *%Arg0) {
+define void @lookahead_crash(ptr %A, ptr %S, ptr %Arg0) {
 ; CHECK-LABEL: @lookahead_crash(
-; CHECK-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    [[C0:%.*]] = call double @_ZN1i2ayEv(%Class* [[ARG0:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[C0:%.*]] = call double @_ZN1i2ayEv(ptr [[ARG0:%.*]])
 ; CHECK-NEXT:    [[C1:%.*]] = call double @_ZN1i2axEv()
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[S:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %IdxA0 = getelementptr inbounds double, double* %A, i64 0
-  %IdxA1 = getelementptr inbounds double, double* %A, i64 1
+  %IdxA1 = getelementptr inbounds double, ptr %A, i64 1
 
-  %A0 = load double, double *%IdxA0, align 8
-  %A1 = load double, double *%IdxA1, align 8
+  %A0 = load double, ptr %A, align 8
+  %A1 = load double, ptr %IdxA1, align 8
 
-  %C0 = call double @_ZN1i2ayEv(%Class *%Arg0)
+  %C0 = call double @_ZN1i2ayEv(ptr %Arg0)
   %C1 = call double @_ZN1i2axEv()
 
   %add0 = fadd fast double %A0, %C0
   %add1 = fadd fast double %A1, %C1
 
-  %IdxS0 = getelementptr inbounds double, double* %S, i64 0
-  %IdxS1 = getelementptr inbounds double, double* %S, i64 1
-  store double %add0, double *%IdxS0, align 8
-  store double %add1, double *%IdxS1, align 8
+  %IdxS1 = getelementptr inbounds double, ptr %S, i64 1
+  store double %add0, ptr %S, align 8
+  store double %add1, ptr %IdxS1, align 8
   ret void
 }
 
 ; This checks that we choose to group consecutive extracts from the same vectors.
-define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x double> *%vecPtr1, <2 x double>* %vecPtr2) {
+define void @ChecksExtractScores(ptr %storeArray, ptr %array, ptr %vecPtr1, ptr %vecPtr2) {
 ; CHECK-LABEL: @ChecksExtractScores(
-; CHECK-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
-; CHECK-NEXT:    [[LOADA0:%.*]] = load double, double* [[IDX0]], align 4
-; CHECK-NEXT:    [[LOADA1:%.*]] = load double, double* [[IDX1]], align 4
-; CHECK-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
-; CHECK-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
-; CHECK-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 1
+; CHECK-NEXT:    [[LOADA0:%.*]] = load double, ptr [[ARRAY]], align 4
+; CHECK-NEXT:    [[LOADA1:%.*]] = load double, ptr [[IDX1]], align 4
+; CHECK-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4
+; CHECK-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[LOADVEC]], [[SHUFFLE]]
@@ -450,19 +399,17 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC2]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[STOREARRAY:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx0 = getelementptr inbounds double, double* %array, i64 0
-  %idx1 = getelementptr inbounds double, double* %array, i64 1
-  %loadA0 = load double, double* %idx0, align 4
-  %loadA1 = load double, double* %idx1, align 4
+  %idx1 = getelementptr inbounds double, ptr %array, i64 1
+  %loadA0 = load double, ptr %array, align 4
+  %loadA1 = load double, ptr %idx1, align 4
 
-  %loadVec = load <2 x double>, <2 x double>* %vecPtr1, align 4
+  %loadVec = load <2 x double>, ptr %vecPtr1, align 4
   %extrA0 = extractelement <2 x double> %loadVec, i32 0
   %extrA1 = extractelement <2 x double> %loadVec, i32 1
-  %loadVec2 = load <2 x double>, <2 x double>* %vecPtr2, align 4
+  %loadVec2 = load <2 x double>, ptr %vecPtr2, align 4
   %extrB0 = extractelement <2 x double> %loadVec2, i32 0
   %extrB1 = extractelement <2 x double> %loadVec2, i32 1
 
@@ -473,10 +420,9 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
   %add0 = fadd double %mul0, %mul3
   %add1 = fadd double %mul1, %mul4
 
-  %sidx0 = getelementptr inbounds double, double* %storeArray, i64 0
-  %sidx1 = getelementptr inbounds double, double* %storeArray, i64 1
-  store double %add0, double *%sidx0, align 8
-  store double %add1, double *%sidx1, align 8
+  %sidx1 = getelementptr inbounds double, ptr %storeArray, i64 1
+  store double %add0, ptr %storeArray, align 8
+  store double %add1, ptr %sidx1, align 8
   ret void
 }
 
@@ -625,21 +571,18 @@ define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
 }
 
 ; Same as @ChecksExtractScores, but the extratelement vector operands do not match.
-define void @ChecksExtractScores_
diff erent_vectors(double* %storeArray, double* %array, <2 x double> *%vecPtr1, <2 x double>* %vecPtr2, <2 x double>* %vecPtr3, <2 x double>* %vecPtr4) {
+define void @ChecksExtractScores_
diff erent_vectors(ptr %storeArray, ptr %array, ptr %vecPtr1, ptr %vecPtr2, ptr %vecPtr3, ptr %vecPtr4) {
 ;
 ; SSE-LABEL: @ChecksExtractScores_
diff erent_vectors(
-; SSE-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
-; SSE-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
-; SSE-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
+; SSE-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4
+; SSE-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4
 ; SSE-NEXT:    [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
 ; SSE-NEXT:    [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
-; SSE-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
-; SSE-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
+; SSE-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, ptr [[VECPTR3:%.*]], align 4
+; SSE-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, ptr [[VECPTR4:%.*]], align 4
 ; SSE-NEXT:    [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
 ; SSE-NEXT:    [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
-; SSE-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAY:%.*]], align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
 ; SSE-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
 ; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
@@ -648,24 +591,21 @@ define void @ChecksExtractScores_
diff erent_vectors(double* %storeArray, double*
 ; SSE-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
 ; SSE-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
 ; SSE-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
-; SSE-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
+; SSE-NEXT:    store <2 x double> [[TMP9]], ptr [[STOREARRAY:%.*]], align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ChecksExtractScores_
diff erent_vectors(
-; AVX-NEXT:    [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
-; AVX-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
-; AVX-NEXT:    [[LOADA0:%.*]] = load double, double* [[IDX0]], align 4
-; AVX-NEXT:    [[LOADA1:%.*]] = load double, double* [[IDX1]], align 4
-; AVX-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
-; AVX-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
+; AVX-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAY:%.*]], i64 1
+; AVX-NEXT:    [[LOADA0:%.*]] = load double, ptr [[ARRAY]], align 4
+; AVX-NEXT:    [[LOADA1:%.*]] = load double, ptr [[IDX1]], align 4
+; AVX-NEXT:    [[LOADVEC:%.*]] = load <2 x double>, ptr [[VECPTR1:%.*]], align 4
+; AVX-NEXT:    [[LOADVEC2:%.*]] = load <2 x double>, ptr [[VECPTR2:%.*]], align 4
 ; AVX-NEXT:    [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
 ; AVX-NEXT:    [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
-; AVX-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
-; AVX-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
+; AVX-NEXT:    [[LOADVEC3:%.*]] = load <2 x double>, ptr [[VECPTR3:%.*]], align 4
+; AVX-NEXT:    [[LOADVEC4:%.*]] = load <2 x double>, ptr [[VECPTR4:%.*]], align 4
 ; AVX-NEXT:    [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
 ; AVX-NEXT:    [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
-; AVX-NEXT:    [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0
@@ -677,21 +617,19 @@ define void @ChecksExtractScores_
diff erent_vectors(double* %storeArray, double*
 ; AVX-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; AVX-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE1]]
 ; AVX-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]]
-; AVX-NEXT:    [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
-; AVX-NEXT:    store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
+; AVX-NEXT:    store <2 x double> [[TMP9]], ptr [[STOREARRAY:%.*]], align 8
 ; AVX-NEXT:    ret void
 ;
-  %idx0 = getelementptr inbounds double, double* %array, i64 0
-  %idx1 = getelementptr inbounds double, double* %array, i64 1
-  %loadA0 = load double, double* %idx0, align 4
-  %loadA1 = load double, double* %idx1, align 4
+  %idx1 = getelementptr inbounds double, ptr %array, i64 1
+  %loadA0 = load double, ptr %array, align 4
+  %loadA1 = load double, ptr %idx1, align 4
 
-  %loadVec = load <2 x double>, <2 x double>* %vecPtr1, align 4
-  %loadVec2 = load <2 x double>, <2 x double>* %vecPtr2, align 4
+  %loadVec = load <2 x double>, ptr %vecPtr1, align 4
+  %loadVec2 = load <2 x double>, ptr %vecPtr2, align 4
   %extrA0 = extractelement <2 x double> %loadVec, i32 0
   %extrA1 = extractelement <2 x double> %loadVec2, i32 1
-  %loadVec3= load <2 x double>, <2 x double>* %vecPtr3, align 4
-  %loadVec4 = load <2 x double>, <2 x double>* %vecPtr4, align 4
+  %loadVec3= load <2 x double>, ptr %vecPtr3, align 4
+  %loadVec4 = load <2 x double>, ptr %vecPtr4, align 4
   %extrB0 = extractelement <2 x double> %loadVec3, i32 0
   %extrB1 = extractelement <2 x double> %loadVec4, i32 1
 
@@ -702,24 +640,19 @@ define void @ChecksExtractScores_
diff erent_vectors(double* %storeArray, double*
   %add0 = fadd double %mul0, %mul3
   %add1 = fadd double %mul1, %mul4
 
-  %sidx0 = getelementptr inbounds double, double* %storeArray, i64 0
-  %sidx1 = getelementptr inbounds double, double* %storeArray, i64 1
-  store double %add0, double *%sidx0, align 8
-  store double %add1, double *%sidx1, align 8
+  %sidx1 = getelementptr inbounds double, ptr %storeArray, i64 1
+  store double %add0, ptr %storeArray, align 8
+  store double %add1, ptr %sidx1, align 8
   ret void
 }
 
 ; This checks that we we prefer splats rather than reverse load vectors + shuffles.
 ; 2-wide splat loads in x86 use a single instruction so they are quite cheap.
-define double @splat_loads(double *%array1, double *%array2, double *%ptrA, double *%ptrB) {
+define double @splat_loads(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; SSE-LABEL: @splat_loads(
 ; SSE-NEXT:  entry:
-; SSE-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
-; SSE-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
-; SSE-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; SSE-NEXT:    [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8
 ; SSE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
 ; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
@@ -731,13 +664,10 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub
 ;
 ; AVX-LABEL: @splat_loads(
 ; AVX-NEXT:  entry:
-; AVX-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
-; AVX-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
-; AVX-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
-; AVX-NEXT:    [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
-; AVX-NEXT:    [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
-; AVX-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; AVX-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1
+; AVX-NEXT:    [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8
+; AVX-NEXT:    [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
 ; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; AVX-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
@@ -751,15 +681,13 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub
 ; AVX-NEXT:    ret double [[ADD3]]
 ;
 entry:
-  %gep_1_0 = getelementptr inbounds double, double* %array1, i64 0
-  %gep_1_1 = getelementptr inbounds double, double* %array1, i64 1
-  %ld_1_0 = load double, double* %gep_1_0, align 8
-  %ld_1_1 = load double, double* %gep_1_1, align 8
+  %gep_1_1 = getelementptr inbounds double, ptr %array1, i64 1
+  %ld_1_0 = load double, ptr %array1, align 8
+  %ld_1_1 = load double, ptr %gep_1_1, align 8
 
-  %gep_2_0 = getelementptr inbounds double, double* %array2, i64 0
-  %gep_2_1 = getelementptr inbounds double, double* %array2, i64 1
-  %ld_2_0 = load double, double* %gep_2_0, align 8
-  %ld_2_1 = load double, double* %gep_2_1, align 8
+  %gep_2_1 = getelementptr inbounds double, ptr %array2, i64 1
+  %ld_2_0 = load double, ptr %array2, align 8
+  %ld_2_1 = load double, ptr %gep_2_1, align 8
 
   %mul1 = fmul double %ld_1_0, %ld_2_0
   %mul2 = fmul double %ld_1_1, %ld_2_0
@@ -776,15 +704,11 @@ entry:
 
 
 ; Same as splat_loads() but the splat load has internal uses in the slp graph.
-define double @splat_loads_with_internal_uses(double *%array1, double *%array2, double *%ptrA, double *%ptrB) {
+define double @splat_loads_with_internal_uses(ptr %array1, ptr %array2, ptr %ptrA, ptr %ptrB) {
 ; SSE-LABEL: @splat_loads_with_internal_uses(
 ; SSE-NEXT:  entry:
-; SSE-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
-; SSE-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
-; SSE-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; SSE-NEXT:    [[TMP2:%.*]] = bitcast double* [[GEP_2_0]] to <2 x double>*
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAY2:%.*]], align 8
 ; SSE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
 ; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
@@ -798,13 +722,10 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2,
 ;
 ; AVX-LABEL: @splat_loads_with_internal_uses(
 ; AVX-NEXT:  entry:
-; AVX-NEXT:    [[GEP_1_0:%.*]] = getelementptr inbounds double, double* [[ARRAY1:%.*]], i64 0
-; AVX-NEXT:    [[GEP_2_0:%.*]] = getelementptr inbounds double, double* [[ARRAY2:%.*]], i64 0
-; AVX-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, double* [[ARRAY2]], i64 1
-; AVX-NEXT:    [[LD_2_0:%.*]] = load double, double* [[GEP_2_0]], align 8
-; AVX-NEXT:    [[LD_2_1:%.*]] = load double, double* [[GEP_2_1]], align 8
-; AVX-NEXT:    [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>*
-; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; AVX-NEXT:    [[GEP_2_1:%.*]] = getelementptr inbounds double, ptr [[ARRAY2:%.*]], i64 1
+; AVX-NEXT:    [[LD_2_0:%.*]] = load double, ptr [[ARRAY2]], align 8
+; AVX-NEXT:    [[LD_2_1:%.*]] = load double, ptr [[GEP_2_1]], align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAY1:%.*]], align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0
 ; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; AVX-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
@@ -819,15 +740,13 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2,
 ; AVX-NEXT:    ret double [[RES]]
 ;
 entry:
-  %gep_1_0 = getelementptr inbounds double, double* %array1, i64 0
-  %gep_1_1 = getelementptr inbounds double, double* %array1, i64 1
-  %ld_1_0 = load double, double* %gep_1_0, align 8
-  %ld_1_1 = load double, double* %gep_1_1, align 8
-
-  %gep_2_0 = getelementptr inbounds double, double* %array2, i64 0
-  %gep_2_1 = getelementptr inbounds double, double* %array2, i64 1
-  %ld_2_0 = load double, double* %gep_2_0, align 8
-  %ld_2_1 = load double, double* %gep_2_1, align 8
+  %gep_1_1 = getelementptr inbounds double, ptr %array1, i64 1
+  %ld_1_0 = load double, ptr %array1, align 8
+  %ld_1_1 = load double, ptr %gep_1_1, align 8
+
+  %gep_2_1 = getelementptr inbounds double, ptr %array2, i64 1
+  %ld_2_0 = load double, ptr %array2, align 8
+  %ld_2_1 = load double, ptr %gep_2_1, align 8
 
   %mul1 = fmul double %ld_1_0, %ld_2_0
   %mul2 = fmul double %ld_1_1, %ld_2_0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index e7b1c9470fc65..42068c9bc5d49 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -4,21 +4,19 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-define i32 @foo(i32* nocapture %A, i32 %n) {
+define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP62:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP62]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <8 x i32> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP5]], [[N]]
@@ -32,45 +30,45 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %0, %n
-  store i32 %add1, i32* %arrayidx, align 4
+  store i32 %add1, ptr %arrayidx, align 4
   %1 = or i64 %indvars.iv, 1
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
-  %2 = load i32, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %1
+  %2 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %2, %n
-  store i32 %add5, i32* %arrayidx4, align 4
+  store i32 %add5, ptr %arrayidx4, align 4
   %3 = or i64 %indvars.iv, 2
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %3
-  %4 = load i32, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %3
+  %4 = load i32, ptr %arrayidx8, align 4
   %add9 = add nsw i32 %4, %n
-  store i32 %add9, i32* %arrayidx8, align 4
+  store i32 %add9, ptr %arrayidx8, align 4
   %5 = or i64 %indvars.iv, 3
-  %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %5
-  %6 = load i32, i32* %arrayidx12, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %A, i64 %5
+  %6 = load i32, ptr %arrayidx12, align 4
   %add13 = add nsw i32 %6, %n
-  store i32 %add13, i32* %arrayidx12, align 4
+  store i32 %add13, ptr %arrayidx12, align 4
   %7 = or i64 %indvars.iv, 4
-  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 %7
-  %8 = load i32, i32* %arrayidx16, align 4
+  %arrayidx16 = getelementptr inbounds i32, ptr %A, i64 %7
+  %8 = load i32, ptr %arrayidx16, align 4
   %add17 = add nsw i32 %8, %n
-  store i32 %add17, i32* %arrayidx16, align 4
+  store i32 %add17, ptr %arrayidx16, align 4
   %9 = or i64 %indvars.iv, 5
-  %arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %9
-  %10 = load i32, i32* %arrayidx20, align 4
+  %arrayidx20 = getelementptr inbounds i32, ptr %A, i64 %9
+  %10 = load i32, ptr %arrayidx20, align 4
   %add21 = add nsw i32 %10, %n
-  store i32 %add21, i32* %arrayidx20, align 4
+  store i32 %add21, ptr %arrayidx20, align 4
   %11 = or i64 %indvars.iv, 6
-  %arrayidx24 = getelementptr inbounds i32, i32* %A, i64 %11
-  %12 = load i32, i32* %arrayidx24, align 4
+  %arrayidx24 = getelementptr inbounds i32, ptr %A, i64 %11
+  %12 = load i32, ptr %arrayidx24, align 4
   %add25 = add nsw i32 %12, %n
-  store i32 %add25, i32* %arrayidx24, align 4
+  store i32 %add25, ptr %arrayidx24, align 4
   %13 = or i64 %indvars.iv, 7
-  %arrayidx28 = getelementptr inbounds i32, i32* %A, i64 %13
-  %14 = load i32, i32* %arrayidx28, align 4
+  %arrayidx28 = getelementptr inbounds i32, ptr %A, i64 %13
+  %14 = load i32, ptr %arrayidx28, align 4
   %add29 = add nsw i32 %14, %n
-  store i32 %add29, i32* %arrayidx28, align 4
+  store i32 %add29, ptr %arrayidx28, align 4
   %indvars.iv.next = add i64 %indvars.iv, 8
   %15 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %15, %n

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
index 1e86c98908252..5baa8bca154a8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
@@ -43,7 +43,7 @@ bb1:                                              ; preds = %bb1, %bb
   br label %bb1
 }
 
-define void @test_2(i8 addrspace(1)* %arg, i32 %arg1) #0 {
+define void @test_2(ptr addrspace(1) %arg, i32 %arg1) #0 {
 ; CHECK-LABEL: @test_2(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB2:%.*]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
index 1db76c3b81468..d417dd275d62f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
@@ -1,69 +1,69 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -aa-pipeline='basic-aa,scoped-noalias-aa' -passes=slp-vectorizer -mtriple=x86_64-apple-darwin -S %s | FileCheck %s
 
-define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly %counter) {
+define void @version_multiple(ptr nocapture %out_block, ptr nocapture readonly %counter) {
 ; CHECK-LABEL: @version_multiple(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[COUNTER:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COUNTER:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[OUT_BLOCK:%.*]], align 4
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    store i32 [[XOR]], i32* [[OUT_BLOCK]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    store i32 [[XOR]], ptr [[OUT_BLOCK]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[COUNTER]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[OUT_BLOCK]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX2_1]], align 4
 ; CHECK-NEXT:    [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    store i32 [[XOR_1]], i32* [[ARRAYIDX2_1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    store i32 [[XOR_1]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[COUNTER]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[OUT_BLOCK]], i64 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX2_2]], align 4
 ; CHECK-NEXT:    [[XOR_2:%.*]] = xor i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    store i32 [[XOR_2]], i32* [[ARRAYIDX2_2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    store i32 [[XOR_2]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[COUNTER]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[OUT_BLOCK]], i64 3
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX2_3]], align 4
 ; CHECK-NEXT:    [[XOR_3:%.*]] = xor i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT:    store i32 [[XOR_3]], i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    store i32 [[XOR_3]], ptr [[ARRAYIDX2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %counter, align 4
-  %1 = load i32, i32* %out_block, align 4
+  %0 = load i32, ptr %counter, align 4
+  %1 = load i32, ptr %out_block, align 4
   %xor = xor i32 %1, %0
-  store i32 %xor, i32* %out_block, align 4
-  %arrayidx.1 = getelementptr inbounds i32, i32* %counter, i64 1
-  %2 = load i32, i32* %arrayidx.1, align 4
-  %arrayidx2.1 = getelementptr inbounds i32, i32* %out_block, i64 1
-  %3 = load i32, i32* %arrayidx2.1, align 4
+  store i32 %xor, ptr %out_block, align 4
+  %arrayidx.1 = getelementptr inbounds i32, ptr %counter, i64 1
+  %2 = load i32, ptr %arrayidx.1, align 4
+  %arrayidx2.1 = getelementptr inbounds i32, ptr %out_block, i64 1
+  %3 = load i32, ptr %arrayidx2.1, align 4
   %xor.1 = xor i32 %3, %2
-  store i32 %xor.1, i32* %arrayidx2.1, align 4
-  %arrayidx.2 = getelementptr inbounds i32, i32* %counter, i64 2
-  %4 = load i32, i32* %arrayidx.2, align 4
-  %arrayidx2.2 = getelementptr inbounds i32, i32* %out_block, i64 2
-  %5 = load i32, i32* %arrayidx2.2, align 4
+  store i32 %xor.1, ptr %arrayidx2.1, align 4
+  %arrayidx.2 = getelementptr inbounds i32, ptr %counter, i64 2
+  %4 = load i32, ptr %arrayidx.2, align 4
+  %arrayidx2.2 = getelementptr inbounds i32, ptr %out_block, i64 2
+  %5 = load i32, ptr %arrayidx2.2, align 4
   %xor.2 = xor i32 %5, %4
-  store i32 %xor.2, i32* %arrayidx2.2, align 4
-  %arrayidx.3 = getelementptr inbounds i32, i32* %counter, i64 3
-  %6 = load i32, i32* %arrayidx.3, align 4
-  %arrayidx2.3 = getelementptr inbounds i32, i32* %out_block, i64 3
-  %7 = load i32, i32* %arrayidx2.3, align 4
+  store i32 %xor.2, ptr %arrayidx2.2, align 4
+  %arrayidx.3 = getelementptr inbounds i32, ptr %counter, i64 3
+  %6 = load i32, ptr %arrayidx.3, align 4
+  %arrayidx2.3 = getelementptr inbounds i32, ptr %out_block, i64 3
+  %7 = load i32, ptr %arrayidx2.3, align 4
   %xor.3 = xor i32 %7, %6
-  store i32 %xor.3, i32* %arrayidx2.3, align 4
+  store i32 %xor.3, ptr %arrayidx2.3, align 4
   ret void
 }
 
 declare void @use(<8 x float>)
-define void @delete_pointer_bound(float* %a, float* %b, i1 %c) #0 {
+define void @delete_pointer_bound(ptr %a, ptr %b, i1 %c) #0 {
 ; CHECK-LABEL: @delete_pointer_bound(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B_10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 10
-; CHECK-NEXT:    [[B_14:%.*]] = getelementptr inbounds float, float* [[B]], i64 14
+; CHECK-NEXT:    [[B_10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 10
+; CHECK-NEXT:    [[B_14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 14
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       else:
-; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[B_10]], align 4
-; CHECK-NEXT:    [[L1:%.*]] = load float, float* [[B_14]], align 4
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[B_10]], align 4
+; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[B_14]], align 4
 ; CHECK-NEXT:    [[I2:%.*]] = insertelement <8 x float> undef, float [[L0]], i32 2
 ; CHECK-NEXT:    [[I3:%.*]] = insertelement <8 x float> [[I2]], float [[L0]], i32 3
 ; CHECK-NEXT:    [[I4:%.*]] = insertelement <8 x float> [[I3]], float [[L1]], i32 4
@@ -71,24 +71,23 @@ define void @delete_pointer_bound(float* %a, float* %b, i1 %c) #0 {
 ; CHECK-NEXT:    call void @use(<8 x float> [[I7]])
 ; CHECK-NEXT:    ret void
 ; CHECK:       then:
-; CHECK-NEXT:    [[A_8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 8
-; CHECK-NEXT:    store float 0.000000e+00, float* [[A_8]], align 4
-; CHECK-NEXT:    [[L6:%.*]] = load float, float* [[B_14]], align 4
-; CHECK-NEXT:    [[A_5:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
-; CHECK-NEXT:    store float [[L6]], float* [[A_5]], align 4
-; CHECK-NEXT:    [[A_6:%.*]] = getelementptr inbounds float, float* [[A]], i64 6
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A_6]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> zeroinitializer, <2 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[A_8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 8
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[A_8]], align 4
+; CHECK-NEXT:    [[L6:%.*]] = load float, ptr [[B_14]], align 4
+; CHECK-NEXT:    [[A_5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 5
+; CHECK-NEXT:    store float [[L6]], ptr [[A_5]], align 4
+; CHECK-NEXT:    [[A_6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 6
+; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr [[A_6]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %b.10 = getelementptr inbounds float, float* %b, i64 10
-  %b.14 = getelementptr inbounds float, float* %b, i64 14
+  %b.10 = getelementptr inbounds float, ptr %b, i64 10
+  %b.14 = getelementptr inbounds float, ptr %b, i64 14
   br i1 %c, label %then, label %else
 
 else:
-  %l0 = load float, float* %b.10, align 4
-  %l1 = load float, float* %b.14, align 4
+  %l0 = load float, ptr %b.10, align 4
+  %l1 = load float, ptr %b.14, align 4
   %i2 = insertelement <8 x float> undef, float %l0, i32 2
   %i3 = insertelement <8 x float> %i2, float %l0, i32 3
   %i4 = insertelement <8 x float> %i3, float %l1, i32 4
@@ -97,27 +96,27 @@ else:
   ret void
 
 then:
-  %a.8 = getelementptr inbounds float, float* %a, i64 8
-  store float 0.0, float* %a.8, align 4
-  %l6 = load float, float* %b.14, align 4
-  %a.5 = getelementptr inbounds float, float* %a, i64 5
-  store float %l6, float* %a.5, align 4
-  %a.6 = getelementptr inbounds float, float* %a, i64 6
-  store float 0.0, float* %a.6, align 4
-  %a.7 = getelementptr inbounds float, float* %a, i64 7
-  store float 0.0, float* %a.7, align 4
+  %a.8 = getelementptr inbounds float, ptr %a, i64 8
+  store float 0.0, ptr %a.8, align 4
+  %l6 = load float, ptr %b.14, align 4
+  %a.5 = getelementptr inbounds float, ptr %a, i64 5
+  store float %l6, ptr %a.5, align 4
+  %a.6 = getelementptr inbounds float, ptr %a, i64 6
+  store float 0.0, ptr %a.6, align 4
+  %a.7 = getelementptr inbounds float, ptr %a, i64 7
+  store float 0.0, ptr %a.7, align 4
   ret void
 }
 
-%struct.zot = type { i16, i16, i16, i32, float, float, float, %struct.quux*, %struct.zot*, %struct.wombat*, %struct.wombat.0 }
-%struct.quux = type { i16, %struct.quux*, %struct.quux* }
-%struct.wombat = type { i32, i16, i8, i8, %struct.eggs* }
+%struct.zot = type { i16, i16, i16, i32, float, float, float, ptr, ptr, ptr, %struct.wombat.0 }
+%struct.quux = type { i16, ptr, ptr }
+%struct.wombat = type { i32, i16, i8, i8, ptr }
 %struct.eggs = type { float, i8, %struct.ham }
 %struct.ham = type { [2 x double], [8 x i8] }
 %struct.wombat.0 = type { %struct.bar }
-%struct.bar = type { [3 x double], [3 x double], double, double, i16, [3 x double]*, i32, [3 x double] }
+%struct.bar = type { [3 x double], [3 x double], double, double, i16, ptr, i32, [3 x double] }
 
-define double @preserve_loop_info(%struct.zot* %arg) {
+define double @preserve_loop_info(ptr %arg) {
 ; CHECK-LABEL: @preserve_loop_info(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca [3 x double], align 16
@@ -129,22 +128,21 @@ define double @preserve_loop_info(%struct.zot* %arg) {
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    br i1 undef, label [[BB:%.*]], label [[OUTER_HEADER]]
 ; CHECK:       bb:
-; CHECK-NEXT:    [[TMP5:%.*]] = load [3 x double]*, [3 x double]** undef, align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[TMP]], i64 0, i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[TMP]], i64 0, i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr undef, align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP]], i64 0, i64 1
 ; CHECK-NEXT:    br label [[LOOP_3HEADER:%.*]]
 ; CHECK:       loop.3header:
 ; CHECK-NEXT:    br i1 undef, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]]
 ; CHECK:       bb9:
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[TMP5]], i64 undef, i64 1
-; CHECK-NEXT:    store double undef, double* [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP10]], align 8
-; CHECK-NEXT:    store double [[TMP12]], double* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP5]], i64 undef, i64 1
+; CHECK-NEXT:    store double undef, ptr [[TMP]], align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8
+; CHECK-NEXT:    store double [[TMP12]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    br label [[LOOP_3LATCH]]
 ; CHECK:       loop.3latch:
 ; CHECK-NEXT:    br i1 undef, label [[BB14:%.*]], label [[LOOP_3HEADER]]
 ; CHECK:       bb14:
-; CHECK-NEXT:    [[TMP15:%.*]] = call double undef(double* [[TMP6]], %struct.zot* [[ARG:%.*]])
+; CHECK-NEXT:    [[TMP15:%.*]] = call double undef(ptr [[TMP]], ptr [[ARG:%.*]])
 ; CHECK-NEXT:    ret double undef
 ;
 entry:
@@ -161,41 +159,39 @@ outer.latch:                                              ; preds = %bb16
   br i1 undef, label %bb, label %outer.header
 
 bb:                                              ; preds = %bb3
-  %tmp5 = load [3 x double]*, [3 x double]** undef, align 8
-  %tmp6 = getelementptr inbounds [3 x double], [3 x double]* %tmp, i64 0, i64 0
-  %tmp7 = getelementptr inbounds [3 x double], [3 x double]* %tmp, i64 0, i64 1
+  %tmp5 = load ptr, ptr undef, align 8
+  %tmp7 = getelementptr inbounds [3 x double], ptr %tmp, i64 0, i64 1
   br label %loop.3header
 
 loop.3header:                                              ; preds = %bb13, %bb4
   br i1 undef, label %loop.3latch, label %bb9
 
 bb9:                                              ; preds = %bb8
-  %tmp10 = getelementptr inbounds [3 x double], [3 x double]* %tmp5, i64 undef, i64 1
-  store double undef, double* %tmp6, align 16
-  %tmp12 = load double, double* %tmp10, align 8
-  store double %tmp12, double* %tmp7, align 8
+  %tmp10 = getelementptr inbounds [3 x double], ptr %tmp5, i64 undef, i64 1
+  store double undef, ptr %tmp, align 16
+  %tmp12 = load double, ptr %tmp10, align 8
+  store double %tmp12, ptr %tmp7, align 8
   br label %loop.3latch
 
 loop.3latch:                                             ; preds = %bb11, %bb8
   br i1 undef, label %bb14, label %loop.3header
 
 bb14:                                             ; preds = %bb13
-  %tmp15 = call double undef(double* %tmp6, %struct.zot* %arg)
+  %tmp15 = call double undef(ptr %tmp, ptr %arg)
   ret double undef
 }
 
-define void @gather_sequence_crash(<2 x float> %arg, float* %arg1, float %arg2, float* %arg3, float* %arg4, float* %arg5, i1 %c.1, i1 %c.2) {
+define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr %arg3, ptr %arg4, ptr %arg5, i1 %c.1, i1 %c.2) {
 ; CHECK-LABEL: @gather_sequence_crash(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[BB16:%.*]], label [[BB6:%.*]]
 ; CHECK:       bb6:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[ARG1:%.*]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[ARG1:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float [[ARG2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb16:
 ; CHECK-NEXT:    br label [[BB17:%.*]]
@@ -210,38 +206,33 @@ define void @gather_sequence_crash(<2 x float> %arg, float* %arg1, float %arg2,
 ; CHECK:       bb21:
 ; CHECK-NEXT:    br label [[BB22:%.*]]
 ; CHECK:       bb22:
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[ARG4:%.*]], i32 0
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i64 7
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr float, ptr [[ARG4:%.*]], i64 7
 ; CHECK-NEXT:    br i1 [[C_2:%.*]], label [[BB25:%.*]], label [[BB22]]
 ; CHECK:       bb25:
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr float, float* [[TMP23]], i64 6
-; CHECK-NEXT:    store float 0.000000e+00, float* [[TMP24]], align 4
-; CHECK-NEXT:    [[TMP27:%.*]] = load float, float* [[ARG5:%.*]], align 4
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr float, ptr [[ARG4]], i64 6
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = load float, ptr [[ARG5:%.*]], align 4
 ; CHECK-NEXT:    [[TMP29:%.*]] = fadd float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    store float 0.000000e+00, float* [[TMP26]], align 4
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr float, float* [[TMP23]], i64 4
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[TMP26]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr float, ptr [[ARG4]], i64 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = fadd float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP30]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> zeroinitializer, <2 x float>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[ARG4]], i32 0
+; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr [[TMP30]], align 4
 ; CHECK-NEXT:    br label [[BB33:%.*]]
 ; CHECK:       bb33:
 ; CHECK-NEXT:    br label [[BB34:%.*]]
 ; CHECK:       bb34:
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr float, float* [[TMP32]], i64 3
-; CHECK-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr float, ptr [[ARG4]], i64 3
+; CHECK-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP38:%.*]] = fadd float 0.000000e+00, [[TMP37]]
-; CHECK-NEXT:    store float [[TMP38]], float* [[TMP35]], align 4
-; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr float, float* [[TMP32]], i64 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP39]] to <2 x float>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    store float [[TMP38]], ptr [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr float, ptr [[ARG4]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, ptr [[TMP39]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x float> zeroinitializer, [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP39]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4
-; CHECK-NEXT:    [[TMP44:%.*]] = load float, float* [[ARG3:%.*]], align 4
-; CHECK-NEXT:    [[TMP45:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP8]], ptr [[TMP39]], align 4
+; CHECK-NEXT:    [[TMP44:%.*]] = load float, ptr [[ARG3:%.*]], align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = load float, ptr [[ARG4]], align 4
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd float 0.000000e+00, [[TMP45]]
-; CHECK-NEXT:    store float [[TMP46]], float* [[TMP32]], align 4
+; CHECK-NEXT:    store float [[TMP46]], ptr [[ARG4]], align 4
 ; CHECK-NEXT:    call void @quux()
 ; CHECK-NEXT:    br label [[BB47:%.*]]
 ; CHECK:       bb47:
@@ -251,20 +242,20 @@ bb:
   br i1 %c.1, label %bb16, label %bb6
 
 bb6:                                              ; preds = %bb
-  %tmp = getelementptr inbounds float, float* %arg1, i32 4
-  %tmp7 = getelementptr inbounds float, float* %arg1, i32 5
-  %tmp8 = getelementptr inbounds float, float* %arg1, i32 3
-  %tmp9 = getelementptr inbounds float, float* %arg1, i32 6
+  %tmp = getelementptr inbounds float, ptr %arg1, i32 4
+  %tmp7 = getelementptr inbounds float, ptr %arg1, i32 5
+  %tmp8 = getelementptr inbounds float, ptr %arg1, i32 3
+  %tmp9 = getelementptr inbounds float, ptr %arg1, i32 6
   %tmp10 = extractelement <2 x float> %arg, i32 0
   %tmp11 = fmul float %tmp10, 0.000000e+00
-  store float %tmp11, float* %tmp, align 4
+  store float %tmp11, ptr %tmp, align 4
   %tmp12 = extractelement <2 x float> %arg, i32 1
   %tmp13 = fmul float %tmp12, 0.000000e+00
-  store float %tmp13, float* %tmp7, align 4
+  store float %tmp13, ptr %tmp7, align 4
   %tmp14 = fmul float %arg2, 0.000000e+00
-  store float %tmp14, float* %tmp8, align 4
+  store float %tmp14, ptr %tmp8, align 4
   %tmp15 = fmul float 0.000000e+00, 0.000000e+00
-  store float %tmp15, float* %tmp9, align 4
+  store float %tmp15, ptr %tmp9, align 4
   ret void
 
 bb16:                                             ; preds = %bb
@@ -286,44 +277,42 @@ bb21:                                             ; preds = %bb20
   br label %bb22
 
 bb22:                                             ; preds = %bb22, %bb21
-  %tmp23 = getelementptr inbounds float, float* %arg4, i32 0
-  %tmp24 = getelementptr float, float* %tmp23, i64 7
+  %tmp24 = getelementptr float, ptr %arg4, i64 7
   br i1 %c.2, label %bb25, label %bb22
 
 bb25:                                             ; preds = %bb22
-  %tmp26 = getelementptr float, float* %tmp23, i64 6
-  store float 0.000000e+00, float* %tmp24, align 4
-  %tmp27 = load float, float* %arg5, align 4
-  %tmp28 = getelementptr float, float* %tmp23, i64 5
+  %tmp26 = getelementptr float, ptr %arg4, i64 6
+  store float 0.000000e+00, ptr %tmp24, align 4
+  %tmp27 = load float, ptr %arg5, align 4
+  %tmp28 = getelementptr float, ptr %arg4, i64 5
   %tmp29 = fadd float 0.000000e+00, 0.000000e+00
-  store float 0.000000e+00, float* %tmp26, align 4
-  %tmp30 = getelementptr float, float* %tmp23, i64 4
-  store float 0.000000e+00, float* %tmp28, align 4
+  store float 0.000000e+00, ptr %tmp26, align 4
+  %tmp30 = getelementptr float, ptr %arg4, i64 4
+  store float 0.000000e+00, ptr %tmp28, align 4
   %tmp31 = fadd float 0.000000e+00, 0.000000e+00
-  store float 0.000000e+00, float* %tmp30, align 4
-  %tmp32 = getelementptr inbounds float, float* %arg4, i32 0
+  store float 0.000000e+00, ptr %tmp30, align 4
   br label %bb33
 
 bb33:                                             ; preds = %bb25
   br label %bb34
 
 bb34:                                             ; preds = %bb33
-  %tmp35 = getelementptr float, float* %tmp32, i64 3
-  %tmp36 = getelementptr float, float* %tmp32, i64 2
-  %tmp37 = load float, float* %tmp35, align 4
+  %tmp35 = getelementptr float, ptr %arg4, i64 3
+  %tmp36 = getelementptr float, ptr %arg4, i64 2
+  %tmp37 = load float, ptr %tmp35, align 4
   %tmp38 = fadd float 0.000000e+00, %tmp37
-  store float %tmp38, float* %tmp35, align 4
-  %tmp39 = getelementptr float, float* %tmp32, i64 1
-  %tmp40 = load float, float* %tmp36, align 4
+  store float %tmp38, ptr %tmp35, align 4
+  %tmp39 = getelementptr float, ptr %arg4, i64 1
+  %tmp40 = load float, ptr %tmp36, align 4
   %tmp41 = fadd float 0.000000e+00, %tmp40
-  store float %tmp41, float* %tmp36, align 4
-  %tmp42 = load float, float* %tmp39, align 4
+  store float %tmp41, ptr %tmp36, align 4
+  %tmp42 = load float, ptr %tmp39, align 4
   %tmp43 = fadd float 0.000000e+00, %tmp42
-  store float %tmp43, float* %tmp39, align 4
-  %tmp44 = load float, float* %arg3, align 4
-  %tmp45 = load float, float* %tmp32, align 4
+  store float %tmp43, ptr %tmp39, align 4
+  %tmp44 = load float, ptr %arg3, align 4
+  %tmp45 = load float, ptr %arg4, align 4
   %tmp46 = fadd float 0.000000e+00, %tmp45
-  store float %tmp46, float* %tmp32, align 4
+  store float %tmp46, ptr %arg4, align 4
   call void @quux()
   br label %bb47
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
index 4bf0f7038a089..c4bdfa804868e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -4,60 +4,51 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-define void @test1(double* %a, double* %b, double* %c) {
+define void @test1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[C:%.*]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8, !tbaa !4
-  %i1 = load double, double* %b, align 8, !tbaa !4
+  %i0 = load double, ptr %a, align 8, !tbaa !4
+  %i1 = load double, ptr %b, align 8, !tbaa !4
   %mul = fmul double %i0, %i1, !fpmath !0
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8, !tbaa !4
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8, !tbaa !4
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8, !tbaa !4
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8, !tbaa !4
   %mul5 = fmul double %i3, %i4, !fpmath !0
-  store double %mul, double* %c, align 8, !tbaa !4
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8, !tbaa !4
+  store double %mul, ptr %c, align 8, !tbaa !4
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8, !tbaa !4
   ret void
 }
 
-define void @test2(double* %a, double* %b, i8* %e) {
+define void @test2(ptr %a, ptr %b, ptr %e) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[C:%.*]] = bitcast i8* [[E:%.*]] to double*
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !5
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[E:%.*]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8, !tbaa !4
-  %i1 = load double, double* %b, align 8, !tbaa !4
+  %i0 = load double, ptr %a, align 8, !tbaa !4
+  %i1 = load double, ptr %b, align 8, !tbaa !4
   %mul = fmul double %i0, %i1, !fpmath !1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8, !tbaa !4
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8, !tbaa !4
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8, !tbaa !4
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8, !tbaa !4
   %mul5 = fmul double %i3, %i4, !fpmath !1
-  %c = bitcast i8* %e to double*
-  store double %mul, double* %c, align 8, !tbaa !4
-  %carrayidx5 = getelementptr inbounds i8, i8* %e, i64 8
-  %arrayidx5 = bitcast i8* %carrayidx5 to double*
-  store double %mul5, double* %arrayidx5, align 8, !tbaa !4
+  store double %mul, ptr %e, align 8, !tbaa !4
+  %carrayidx5 = getelementptr inbounds i8, ptr %e, i64 8
+  store double %mul5, ptr %carrayidx5, align 8, !tbaa !4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
index d3c7a519d76b0..6cfe9af1d4911 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; When computing minimum sizes, if we can prove the sign bit is zero, we can
 ; zero-extend the roots back to their original sizes.
 ;
-define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
+define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; CHECK-LABEL: @PR31243_zext(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
@@ -20,12 +20,12 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
-; CHECK-NEXT:    [[TMP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
-; CHECK-NEXT:    [[TMP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP_6:%.*]] = load i8, i8* [[TMP_4]], align 1
-; CHECK-NEXT:    [[TMP_7:%.*]] = load i8, i8* [[TMP_5]], align 1
+; CHECK-NEXT:    [[TMP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP_6:%.*]] = load i8, ptr [[TMP_4]], align 1
+; CHECK-NEXT:    [[TMP_7:%.*]] = load i8, ptr [[TMP_5]], align 1
 ; CHECK-NEXT:    [[TMP_8:%.*]] = add i8 [[TMP_6]], [[TMP_7]]
 ; CHECK-NEXT:    ret i8 [[TMP_8]]
 ;
@@ -34,10 +34,10 @@ entry:
   %tmp_1 = zext i8 %v1 to i32
   %tmp_2 = or i32 %tmp_0, 1
   %tmp_3 = or i32 %tmp_1, 1
-  %tmp_4 = getelementptr inbounds i8, i8* %ptr, i32 %tmp_2
-  %tmp_5 = getelementptr inbounds i8, i8* %ptr, i32 %tmp_3
-  %tmp_6 = load i8, i8* %tmp_4
-  %tmp_7 = load i8, i8* %tmp_5
+  %tmp_4 = getelementptr inbounds i8, ptr %ptr, i32 %tmp_2
+  %tmp_5 = getelementptr inbounds i8, ptr %ptr, i32 %tmp_3
+  %tmp_6 = load i8, ptr %tmp_4
+  %tmp_7 = load i8, ptr %tmp_5
   %tmp_8 = add i8 %tmp_6, %tmp_7
   ret i8 %tmp_8
 }
@@ -55,17 +55,17 @@ entry:
 ;        optimization, we make the proposed smaller type (i8) larger (i16) to
 ;        ensure correctness.
 ;
-define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
+define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; SSE-LABEL: @PR31243_sext(
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    [[TMP0:%.*]] = or i8 [[V0:%.*]], 1
 ; SSE-NEXT:    [[TMP1:%.*]] = or i8 [[V1:%.*]], 1
 ; SSE-NEXT:    [[TMP2:%.*]] = sext i8 [[TMP0]] to i64
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP2]]
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP2]]
 ; SSE-NEXT:    [[TMP3:%.*]] = sext i8 [[TMP1]] to i64
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP3]]
-; SSE-NEXT:    [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP5]], align 1
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP3]]
+; SSE-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
 ; SSE-NEXT:    [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]
 ; SSE-NEXT:    ret i8 [[TMP8]]
 ;
@@ -77,12 +77,12 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16>
 ; AVX-NEXT:    [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i64 0
 ; AVX-NEXT:    [[TMP5:%.*]] = sext i16 [[TMP4]] to i64
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP5]]
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP5]]
 ; AVX-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i64 1
 ; AVX-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP6]] to i64
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[TMP7]]
-; AVX-NEXT:    [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1
-; AVX-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP5]], align 1
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP7]]
+; AVX-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; AVX-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
 ; AVX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]
 ; AVX-NEXT:    ret i8 [[TMP8]]
 ;
@@ -91,10 +91,10 @@ entry:
   %tmp1 = sext i8 %v1 to i32
   %tmp2 = or i32 %tmp0, 1
   %tmp3 = or i32 %tmp1, 1
-  %tmp4 = getelementptr inbounds i8, i8* %ptr, i32 %tmp2
-  %tmp5 = getelementptr inbounds i8, i8* %ptr, i32 %tmp3
-  %tmp6 = load i8, i8* %tmp4
-  %tmp7 = load i8, i8* %tmp5
+  %tmp4 = getelementptr inbounds i8, ptr %ptr, i32 %tmp2
+  %tmp5 = getelementptr inbounds i8, ptr %ptr, i32 %tmp3
+  %tmp6 = load i8, ptr %tmp4
+  %tmp7 = load i8, ptr %tmp5
   %tmp8 = add i8 %tmp6, %tmp7
   ret i8 %tmp8
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
index 1029713f6a3e6..c9e821f023266 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -12,48 +12,46 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;  A[4] += n * 5 + 11;
 ;}
 
-define i32 @foo(i32* nocapture %A, i32 %n) {
+define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i32 [[N:%.*]], 5
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 7, i32 8, i32 9, i32 10>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[A]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nsw i32 [[TMP1]], 11
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    store i32 [[TMP11]], i32* [[TMP9]], align 4
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
   %1 = mul nsw i32 %n, 5
   %2 = add nsw i32 %1, 7
-  %3 = load i32, i32* %A, align 4
+  %3 = load i32, ptr %A, align 4
   %4 = add nsw i32 %2, %3
-  store i32 %4, i32* %A, align 4
+  store i32 %4, ptr %A, align 4
   %5 = add nsw i32 %1, 8
-  %6 = getelementptr inbounds i32, i32* %A, i64 1
-  %7 = load i32, i32* %6, align 4
+  %6 = getelementptr inbounds i32, ptr %A, i64 1
+  %7 = load i32, ptr %6, align 4
   %8 = add nsw i32 %5, %7
-  store i32 %8, i32* %6, align 4
+  store i32 %8, ptr %6, align 4
   %9 = add nsw i32 %1, 9
-  %10 = getelementptr inbounds i32, i32* %A, i64 2
-  %11 = load i32, i32* %10, align 4
+  %10 = getelementptr inbounds i32, ptr %A, i64 2
+  %11 = load i32, ptr %10, align 4
   %12 = add nsw i32 %9, %11
-  store i32 %12, i32* %10, align 4
+  store i32 %12, ptr %10, align 4
   %13 = add nsw i32 %1, 10
-  %14 = getelementptr inbounds i32, i32* %A, i64 3
-  %15 = load i32, i32* %14, align 4
+  %14 = getelementptr inbounds i32, ptr %A, i64 3
+  %15 = load i32, ptr %14, align 4
   %16 = add nsw i32 %13, %15
-  store i32 %16, i32* %14, align 4
+  store i32 %16, ptr %14, align 4
   %17 = add nsw i32 %1, 11
-  %18 = getelementptr inbounds i32, i32* %A, i64 4
-  %19 = load i32, i32* %18, align 4
+  %18 = getelementptr inbounds i32, ptr %A, i64 4
+  %19 = load i32, ptr %18, align 4
   %20 = add nsw i32 %17, %19
-  store i32 %20, i32* %18, align 4
+  store i32 %20, ptr %18, align 4
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index 2eae8c2d9605a..89051c7aba42c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -1,17 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-200 -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -S | FileCheck %s
 
-define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: @test_add_sdiv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1_0:%.*]] = getelementptr i32, i32* [[ARR1:%.*]], i32 0
-; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr i32, i32* [[ARR1]], i32 2
-; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr i32, i32* [[ARR1]], i32 3
-; CHECK-NEXT:    [[GEP2_0:%.*]] = getelementptr i32, i32* [[ARR2:%.*]], i32 0
-; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
-; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
-; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]], align 4
+; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
+; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
+; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
+; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
+; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1146, i32 146>
@@ -19,28 +17,24 @@ define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0
 ; CHECK-NEXT:    [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
 ; CHECK-NEXT:    [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[GEP1_0]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[GEP2_0]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]], align 4
-; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[ARR2]], align 4
+; CHECK-NEXT:    store i32 [[RES2]], ptr [[GEP2_2]], align 4
+; CHECK-NEXT:    store i32 [[RES3]], ptr [[GEP2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep1.0 = getelementptr i32, i32* %arr1, i32 0
-  %gep1.1 = getelementptr i32, i32* %arr1, i32 1
-  %gep1.2 = getelementptr i32, i32* %arr1, i32 2
-  %gep1.3 = getelementptr i32, i32* %arr1, i32 3
-  %gep2.0 = getelementptr i32, i32* %arr2, i32 0
-  %gep2.1 = getelementptr i32, i32* %arr2, i32 1
-  %gep2.2 = getelementptr i32, i32* %arr2, i32 2
-  %gep2.3 = getelementptr i32, i32* %arr2, i32 3
-  %v0 = load i32, i32* %gep1.0
-  %v1 = load i32, i32* %gep1.1
-  %v2 = load i32, i32* %gep1.2
-  %v3 = load i32, i32* %gep1.3
+  %gep1.1 = getelementptr i32, ptr %arr1, i32 1
+  %gep1.2 = getelementptr i32, ptr %arr1, i32 2
+  %gep1.3 = getelementptr i32, ptr %arr1, i32 3
+  %gep2.1 = getelementptr i32, ptr %arr2, i32 1
+  %gep2.2 = getelementptr i32, ptr %arr2, i32 2
+  %gep2.3 = getelementptr i32, ptr %arr2, i32 3
+  %v0 = load i32, ptr %arr1
+  %v1 = load i32, ptr %gep1.1
+  %v2 = load i32, ptr %gep1.2
+  %v3 = load i32, ptr %gep1.3
   %y0 = add nsw i32 %a0, 1146
   %y1 = add nsw i32 %a1, 146
   %y2 = add nsw i32 %a2, 42
@@ -55,29 +49,27 @@ entry:
   ;; would be illegal.
   %res2 = sdiv i32 %v2, %y2
   %res3 = add nsw i32 %v3, %y3
-  store i32 %res0, i32* %gep2.0
-  store i32 %res1, i32* %gep2.1
-  store i32 %res2, i32* %gep2.2
-  store i32 %res3, i32* %gep2.3
+  store i32 %res0, ptr %arr2
+  store i32 %res1, ptr %gep2.1
+  store i32 %res2, ptr %gep2.2
+  store i32 %res3, ptr %gep2.3
   ret void
 }
 
 ;; Similar test, but now div/rem is main opcode and not the alternate one. Same issue.
-define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+define void @test_urem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: @test_urem_add(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1_0:%.*]] = getelementptr i32, i32* [[ARR1:%.*]], i32 0
-; CHECK-NEXT:    [[GEP1_1:%.*]] = getelementptr i32, i32* [[ARR1]], i32 1
-; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr i32, i32* [[ARR1]], i32 2
-; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr i32, i32* [[ARR1]], i32 3
-; CHECK-NEXT:    [[GEP2_0:%.*]] = getelementptr i32, i32* [[ARR2:%.*]], i32 0
-; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
-; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
-; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
-; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]], align 4
-; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]], align 4
-; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]], align 4
+; CHECK-NEXT:    [[GEP1_1:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 1
+; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1]], i32 2
+; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
+; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 1
+; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2]], i32 2
+; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
+; CHECK-NEXT:    [[V0:%.*]] = load i32, ptr [[ARR1]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[GEP1_1]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
 ; CHECK-NEXT:    [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
 ; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
@@ -86,25 +78,23 @@ define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[RES1:%.*]] = urem i32 [[V1]], [[Y1]]
 ; CHECK-NEXT:    [[RES2:%.*]] = urem i32 [[V2]], [[Y2]]
 ; CHECK-NEXT:    [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]], align 4
-; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]], align 4
-; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]], align 4
-; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]], align 4
+; CHECK-NEXT:    store i32 [[RES0]], ptr [[ARR2]], align 4
+; CHECK-NEXT:    store i32 [[RES1]], ptr [[GEP2_1]], align 4
+; CHECK-NEXT:    store i32 [[RES2]], ptr [[GEP2_2]], align 4
+; CHECK-NEXT:    store i32 [[RES3]], ptr [[GEP2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep1.0 = getelementptr i32, i32* %arr1, i32 0
-  %gep1.1 = getelementptr i32, i32* %arr1, i32 1
-  %gep1.2 = getelementptr i32, i32* %arr1, i32 2
-  %gep1.3 = getelementptr i32, i32* %arr1, i32 3
-  %gep2.0 = getelementptr i32, i32* %arr2, i32 0
-  %gep2.1 = getelementptr i32, i32* %arr2, i32 1
-  %gep2.2 = getelementptr i32, i32* %arr2, i32 2
-  %gep2.3 = getelementptr i32, i32* %arr2, i32 3
-  %v0 = load i32, i32* %gep1.0
-  %v1 = load i32, i32* %gep1.1
-  %v2 = load i32, i32* %gep1.2
-  %v3 = load i32, i32* %gep1.3
+  %gep1.1 = getelementptr i32, ptr %arr1, i32 1
+  %gep1.2 = getelementptr i32, ptr %arr1, i32 2
+  %gep1.3 = getelementptr i32, ptr %arr1, i32 3
+  %gep2.1 = getelementptr i32, ptr %arr2, i32 1
+  %gep2.2 = getelementptr i32, ptr %arr2, i32 2
+  %gep2.3 = getelementptr i32, ptr %arr2, i32 3
+  %v0 = load i32, ptr %arr1
+  %v1 = load i32, ptr %gep1.1
+  %v2 = load i32, ptr %gep1.2
+  %v3 = load i32, ptr %gep1.3
   %y0 = add nsw i32 %a0, 1146
   %y1 = add nsw i32 %a1, 146
   %y2 = add nsw i32 %a2, 42
@@ -119,9 +109,9 @@ entry:
   ;;   %result = shuffle %vurem, %vadd, <mask>
   ;; would be illegal.
   %res3 = add nsw i32 %v3, %y3
-  store i32 %res0, i32* %gep2.0
-  store i32 %res1, i32* %gep2.1
-  store i32 %res2, i32* %gep2.2
-  store i32 %res3, i32* %gep2.3
+  store i32 %res0, ptr %arr2
+  store i32 %res1, ptr %gep2.1
+  store i32 %res2, ptr %gep2.2
+  store i32 %res3, ptr %gep2.3
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
index 98e4839543657..32ad5fd80cf57 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
@@ -1,178 +1,174 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
-;int foo(char * restrict A, float * restrict B, float T) {
+;int foo(char * restrict A, ptr restrict B, float T) {
 ;  A[0] = (T * B[10] + 4.0);
 ;  A[1] = (T * B[11] + 5.0);
 ;  A[2] = (T * B[12] + 6.0);
 ;}
 
-define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T) {
+define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, float %T) {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 10
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 10
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP2]], [[T:%.*]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext float [[TMP3]] to double
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd double [[TMP4]], 4.000000e+00
 ; CHECK-NEXT:    [[TMP6:%.*]] = fptosi double [[TMP5]] to i8
-; CHECK-NEXT:    store i8 [[TMP6]], i8* [[A:%.*]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[B]], i64 11
-; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[TMP7]], align 4
+; CHECK-NEXT:    store i8 [[TMP6]], ptr [[A:%.*]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 11
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul float [[TMP8]], [[T]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fpext float [[TMP9]] to double
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd double [[TMP10]], 5.000000e+00
 ; CHECK-NEXT:    [[TMP12:%.*]] = fptosi double [[TMP11]] to i8
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 1
-; CHECK-NEXT:    store i8 [[TMP12]], i8* [[TMP13]], align 1
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[B]], i64 12
-; CHECK-NEXT:    [[TMP15:%.*]] = load float, float* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1
+; CHECK-NEXT:    store i8 [[TMP12]], ptr [[TMP13]], align 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 12
+; CHECK-NEXT:    [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = fmul float [[TMP15]], [[T]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fpext float [[TMP16]] to double
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd double [[TMP17]], 6.000000e+00
 ; CHECK-NEXT:    [[TMP19:%.*]] = fptosi double [[TMP18]] to i8
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 2
-; CHECK-NEXT:    store i8 [[TMP19]], i8* [[TMP20]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
+; CHECK-NEXT:    store i8 [[TMP19]], ptr [[TMP20]], align 1
 ; CHECK-NEXT:    ret i32 undef
 ;
-  %1 = getelementptr inbounds float, float* %B, i64 10
-  %2 = load float, float* %1, align 4
+  %1 = getelementptr inbounds float, ptr %B, i64 10
+  %2 = load float, ptr %1, align 4
   %3 = fmul float %2, %T
   %4 = fpext float %3 to double
   %5 = fadd double %4, 4.000000e+00
   %6 = fptosi double %5 to i8
-  store i8 %6, i8* %A, align 1
-  %7 = getelementptr inbounds float, float* %B, i64 11
-  %8 = load float, float* %7, align 4
+  store i8 %6, ptr %A, align 1
+  %7 = getelementptr inbounds float, ptr %B, i64 11
+  %8 = load float, ptr %7, align 4
   %9 = fmul float %8, %T
   %10 = fpext float %9 to double
   %11 = fadd double %10, 5.000000e+00
   %12 = fptosi double %11 to i8
-  %13 = getelementptr inbounds i8, i8* %A, i64 1
-  store i8 %12, i8* %13, align 1
-  %14 = getelementptr inbounds float, float* %B, i64 12
-  %15 = load float, float* %14, align 4
+  %13 = getelementptr inbounds i8, ptr %A, i64 1
+  store i8 %12, ptr %13, align 1
+  %14 = getelementptr inbounds float, ptr %B, i64 12
+  %15 = load float, ptr %14, align 4
   %16 = fmul float %15, %T
   %17 = fpext float %16 to double
   %18 = fadd double %17, 6.000000e+00
   %19 = fptosi double %18 to i8
-  %20 = getelementptr inbounds i8, i8* %A, i64 2
-  store i8 %19, i8* %20, align 1
+  %20 = getelementptr inbounds i8, ptr %A, i64 2
+  store i8 %19, ptr %20, align 1
   ret i32 undef
 }
 
 ; PR41892
-define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v2f32_store(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
 ; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[P:%.*]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP2]], <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
   %x1 = extractelement <4 x float> %f, i64 1
-  %p1 = getelementptr inbounds float, float* %p, i64 1
-  store float %x0, float* %p, align 4
-  store float %x1, float* %p1, align 4
+  %p1 = getelementptr inbounds float, ptr %p, i64 1
+  store float %x0, ptr %p, align 4
+  store float %x1, ptr %p1, align 4
   ret void
 }
 
-define void @test_v4f32_v2f32_splat_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v2f32_splat_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v2f32_splat_store(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
-; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
-; CHECK-NEXT:    store float [[X0]], float* [[P]], align 4
-; CHECK-NEXT:    store float [[X0]], float* [[P1]], align 4
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    store float [[X0]], ptr [[P]], align 4
+; CHECK-NEXT:    store float [[X0]], ptr [[P1]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
-  %p1 = getelementptr inbounds float, float* %p, i64 1
-  store float %x0, float* %p, align 4
-  store float %x0, float* %p1, align 4
+  %p1 = getelementptr inbounds float, ptr %p, i64 1
+  store float %x0, ptr %p, align 4
+  store float %x0, ptr %p1, align 4
   ret void
 }
 
-define void @test_v4f32_v3f32_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v3f32_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v3f32_store(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
 ; CHECK-NEXT:    [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1
 ; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[P]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP2]], <2 x float>* [[TMP3]], align 4
-; CHECK-NEXT:    store float [[X2]], float* [[P2]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[P]], align 4
+; CHECK-NEXT:    store float [[X2]], ptr [[P2]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
   %x1 = extractelement <4 x float> %f, i64 1
   %x2 = extractelement <4 x float> %f, i64 2
-  %p1 = getelementptr inbounds float, float* %p, i64 1
-  %p2 = getelementptr inbounds float, float* %p, i64 2
-  store float %x0, float* %p, align 4
-  store float %x1, float* %p1, align 4
-  store float %x2, float* %p2, align 4
+  %p1 = getelementptr inbounds float, ptr %p, i64 1
+  %p2 = getelementptr inbounds float, ptr %p, i64 2
+  store float %x0, ptr %p, align 4
+  store float %x1, ptr %p1, align 4
+  store float %x2, ptr %p2, align 4
   ret void
 }
 
-define void @test_v4f32_v3f32_splat_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v3f32_splat_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v3f32_splat_store(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
-; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
-; CHECK-NEXT:    store float [[X0]], float* [[P]], align 4
-; CHECK-NEXT:    store float [[X0]], float* [[P1]], align 4
-; CHECK-NEXT:    store float [[X0]], float* [[P2]], align 4
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2
+; CHECK-NEXT:    store float [[X0]], ptr [[P]], align 4
+; CHECK-NEXT:    store float [[X0]], ptr [[P1]], align 4
+; CHECK-NEXT:    store float [[X0]], ptr [[P2]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
-  %p1 = getelementptr inbounds float, float* %p, i64 1
-  %p2 = getelementptr inbounds float, float* %p, i64 2
-  store float %x0, float* %p, align 4
-  store float %x0, float* %p1, align 4
-  store float %x0, float* %p2, align 4
+  %p1 = getelementptr inbounds float, ptr %p, i64 1
+  %p2 = getelementptr inbounds float, ptr %p, i64 2
+  store float %x0, ptr %p, align 4
+  store float %x0, ptr %p1, align 4
+  store float %x0, ptr %p2, align 4
   ret void
 }
 
-define void @test_v4f32_v4f32_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v4f32_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v4f32_store(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[F:%.*]], <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    store <4 x float> [[F:%.*]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
   %x1 = extractelement <4 x float> %f, i64 1
   %x2 = extractelement <4 x float> %f, i64 2
   %x3 = extractelement <4 x float> %f, i64 3
-  %p1 = getelementptr inbounds float, float* %p, i64 1
-  %p2 = getelementptr inbounds float, float* %p, i64 2
-  %p3 = getelementptr inbounds float, float* %p, i64 3
-  store float %x0, float* %p, align 4
-  store float %x1, float* %p1, align 4
-  store float %x2, float* %p2, align 4
-  store float %x3, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %p, i64 1
+  %p2 = getelementptr inbounds float, ptr %p, i64 2
+  %p3 = getelementptr inbounds float, ptr %p, i64 3
+  store float %x0, ptr %p, align 4
+  store float %x1, ptr %p1, align 4
+  store float %x2, ptr %p2, align 4
+  store float %x3, ptr %p3, align 4
   ret void
 }
 
-define void @test_v4f32_v4f32_splat_store(<4 x float> %f, float* %p){
+define void @test_v4f32_v4f32_splat_store(<4 x float> %f, ptr %p){
 ; CHECK-LABEL: @test_v4f32_v4f32_splat_store(
 ; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %x0 = extractelement <4 x float> %f, i64 0
-  %p1 = getelementptr inbounds float, float* %p, i64 1
-  %p2 = getelementptr inbounds float, float* %p, i64 2
-  %p3 = getelementptr inbounds float, float* %p, i64 3
-  store float %x0, float* %p, align 4
-  store float %x0, float* %p1, align 4
-  store float %x0, float* %p2, align 4
-  store float %x0, float* %p3, align 4
+  %p1 = getelementptr inbounds float, ptr %p, i64 1
+  %p2 = getelementptr inbounds float, ptr %p, i64 2
+  %p3 = getelementptr inbounds float, ptr %p, i64 3
+  store float %x0, ptr %p, align 4
+  store float %x0, ptr %p1, align 4
+  store float %x0, ptr %p2, align 4
+  store float %x0, ptr %p3, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
index d8355a3bd915c..5bac13d87b48f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -7,51 +7,45 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; Make sure we order the operands of commutative operations so that we get
 ; bigger vectorizable trees.
 
-define void @shuffle_operands1(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @shuffle_operands1(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @shuffle_operands1(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @shuffle_operands1(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
 ; SSE2-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i64 1
 ; SSE2-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP5]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %v0_1, %v1
   %v1_2 = fadd double %v2, %v0_2
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
   ret void
 }
 
-define void @vecload_vs_broadcast(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @vecload_vs_broadcast(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @vecload_vs_broadcast(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -61,13 +55,11 @@ define void @vecload_vs_broadcast(double * noalias %from, double * noalias %to,
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
 ; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; SSE2-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -77,33 +69,31 @@ br label %lp
 
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %v0_1, %p
   %v1_2 = fadd double %v0_1, %v0_2
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
 br i1 undef, label %lp, label %ext
 
 ext:
   ret void
 }
 
-define void @vecload_vs_broadcast2(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @vecload_vs_broadcast2(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @vecload_vs_broadcast2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -113,13 +103,11 @@ define void @vecload_vs_broadcast2(double * noalias %from, double * noalias %to,
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
 ; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; SSE2-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -129,33 +117,31 @@ br label %lp
 
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %p, %v0_1
   %v1_2 = fadd double %v0_2, %v0_1
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
 br i1 undef, label %lp, label %ext
 
 ext:
   ret void
 }
 
-define void @vecload_vs_broadcast3(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @vecload_vs_broadcast3(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @vecload_vs_broadcast3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -165,13 +151,11 @@ define void @vecload_vs_broadcast3(double * noalias %from, double * noalias %to,
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
 ; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
 ; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; SSE2-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -181,33 +165,31 @@ br label %lp
 
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %p, %v0_1
   %v1_2 = fadd double %v0_1, %v0_2
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
 br i1 undef, label %lp, label %ext
 
 ext:
   ret void
 }
 
-define void @shuffle_nodes_match1(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @shuffle_nodes_match1(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @shuffle_nodes_match1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -217,13 +199,11 @@ define void @shuffle_nodes_match1(double * noalias %from, double * noalias %to,
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
-; SSE2-NEXT:    [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -233,36 +213,35 @@ br label %lp
 
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %v0_2, %v0_1
   %v1_2 = fadd double %p, %v0_1
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
 br i1 undef, label %lp, label %ext
 
 ext:
   ret void
 }
 
-define void @vecload_vs_broadcast4(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @vecload_vs_broadcast4(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @vecload_vs_broadcast4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
-; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
-; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, ptr [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, ptr [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, ptr [[FROM_1]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -272,13 +251,11 @@ define void @vecload_vs_broadcast4(double * noalias %from, double * noalias %to,
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
-; SSE2-NEXT:    [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -288,14 +265,14 @@ br label %lp
 
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %v0_1, %v0_2
   %v1_2 = fadd double %p, %v0_1
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
 br i1 undef, label %lp, label %ext
 
 ext:
@@ -303,22 +280,21 @@ ext:
 }
 
 
-define void @shuffle_nodes_match2(double * noalias %from, double * noalias %to, double %v1, double %v2) {
+define void @shuffle_nodes_match2(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @shuffle_nodes_match2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, double* [[FROM:%.*]], i32 1
-; CHECK-NEXT:    [[V0_1:%.*]] = load double, double* [[FROM]], align 4
-; CHECK-NEXT:    [[V0_2:%.*]] = load double, double* [[FROM_1]], align 4
+; CHECK-NEXT:    [[FROM_1:%.*]] = getelementptr double, ptr [[FROM:%.*]], i32 1
+; CHECK-NEXT:    [[V0_1:%.*]] = load double, ptr [[FROM]], align 4
+; CHECK-NEXT:    [[V0_2:%.*]] = load double, ptr [[FROM_1]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0_1]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V0_2]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[P]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -328,13 +304,11 @@ define void @shuffle_nodes_match2(double * noalias %from, double * noalias %to,
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP0:%.*]] = bitcast double* [[FROM:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
 ; SSE2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
 ; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP2]]
-; SSE2-NEXT:    [[TMP4:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -344,14 +318,14 @@ br label %lp
 
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
-  %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double , double * %from
-  %v0_2 = load double , double * %from_1
+  %from_1 = getelementptr double, ptr %from, i64 1
+  %v0_1 = load double , ptr %from
+  %v0_2 = load double , ptr %from_1
   %v1_1 = fadd double %v0_1, %v0_2
   %v1_2 = fadd double %v0_1, %p
-  %to_2 = getelementptr double, double * %to, i64 1
-  store double %v1_1, double *%to
-  store double %v1_2, double *%to_2
+  %to_2 = getelementptr double, ptr %to, i64 1
+  store double %v1_1, ptr %to
+  store double %v1_2, ptr %to_2
 br i1 undef, label %lp, label %ext
 
 ext:
@@ -368,33 +342,31 @@ define void @good_load_order() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i32 0, i32 0), align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr @a, align 16
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP14:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP3]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP4]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 4
-; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[ARRAYIDX5]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP11]], <4 x float>* [[TMP12]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP11]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
 ; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP14]] = load float, float* [[ARRAYIDX41]], align 4
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP14]] = load float, ptr [[ARRAYIDX41]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP8]], i64 3
 ; CHECK-NEXT:    [[MUL45:%.*]] = fmul float [[TMP14]], [[TMP15]]
-; CHECK-NEXT:    store float [[MUL45]], float* [[ARRAYIDX31]], align 4
+; CHECK-NEXT:    store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP16]], 31995
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
@@ -405,33 +377,31 @@ define void @good_load_order() {
 ; SSE2-NEXT:  entry:
 ; SSE2-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; SSE2:       for.cond1.preheader:
-; SSE2-NEXT:    [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i32 0, i32 0), align 16
+; SSE2-NEXT:    [[TMP0:%.*]] = load float, ptr @a, align 16
 ; SSE2-NEXT:    br label [[FOR_BODY3:%.*]]
 ; SSE2:       for.body3:
 ; SSE2-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP14:%.*]], [[FOR_BODY3]] ]
 ; SSE2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
 ; SSE2-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; SSE2-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
-; SSE2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP3]]
+; SSE2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP3]]
 ; SSE2-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; SSE2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP4]]
+; SSE2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP4]]
 ; SSE2-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; SSE2-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 4
-; SSE2-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP6]]
-; SSE2-NEXT:    [[TMP7:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
-; SSE2-NEXT:    [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4
+; SSE2-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]]
+; SSE2-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
 ; SSE2-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
 ; SSE2-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
 ; SSE2-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[TMP8]], [[TMP10]]
-; SSE2-NEXT:    [[TMP12:%.*]] = bitcast float* [[ARRAYIDX5]] to <4 x float>*
-; SSE2-NEXT:    store <4 x float> [[TMP11]], <4 x float>* [[TMP12]], align 4
+; SSE2-NEXT:    store <4 x float> [[TMP11]], ptr [[ARRAYIDX5]], align 4
 ; SSE2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
 ; SSE2-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; SSE2-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], [32000 x float]* @a, i32 0, i32 [[TMP13]]
-; SSE2-NEXT:    [[TMP14]] = load float, float* [[ARRAYIDX41]], align 4
+; SSE2-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP13]]
+; SSE2-NEXT:    [[TMP14]] = load float, ptr [[ARRAYIDX41]], align 4
 ; SSE2-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP8]], i64 3
 ; SSE2-NEXT:    [[MUL45:%.*]] = fmul float [[TMP14]], [[TMP15]]
-; SSE2-NEXT:    store float [[MUL45]], float* [[ARRAYIDX31]], align 4
+; SSE2-NEXT:    store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
 ; SSE2-NEXT:    [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; SSE2-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP16]], 31995
 ; SSE2-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
@@ -442,38 +412,38 @@ entry:
   br label %for.cond1.preheader
 
 for.cond1.preheader:
-  %0 = load float, float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), align 16
+  %0 = load float, ptr @a, align 16
   br label %for.body3
 
 for.body3:
   %1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
   %2 = add nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %2
-  %3 = load float, float* %arrayidx, align 4
-  %arrayidx5 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %2
+  %3 = load float, ptr %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv
   %mul6 = fmul float %3, %1
-  store float %mul6, float* %arrayidx5, align 4
+  store float %mul6, ptr %arrayidx5, align 4
   %4 = add nsw i64 %indvars.iv, 2
-  %arrayidx11 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %4
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %4
+  %5 = load float, ptr %arrayidx11, align 4
   %mul15 = fmul float %5, %3
-  store float %mul15, float* %arrayidx, align 4
+  store float %mul15, ptr %arrayidx, align 4
   %6 = add nsw i64 %indvars.iv, 3
-  %arrayidx21 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %6
-  %7 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %6
+  %7 = load float, ptr %arrayidx21, align 4
   %mul25 = fmul float %7, %5
-  store float %mul25, float* %arrayidx11, align 4
+  store float %mul25, ptr %arrayidx11, align 4
   %8 = add nsw i64 %indvars.iv, 4
-  %arrayidx31 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %8
-  %9 = load float, float* %arrayidx31, align 4
+  %arrayidx31 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %8
+  %9 = load float, ptr %arrayidx31, align 4
   %mul35 = fmul float %9, %7
-  store float %mul35, float* %arrayidx21, align 4
+  store float %mul35, ptr %arrayidx21, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
-  %arrayidx41 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.next
-  %10 = load float, float* %arrayidx41, align 4
+  %arrayidx41 = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv.next
+  %10 = load float, ptr %arrayidx41, align 4
   %mul45 = fmul float %10, %9
-  store float %mul45, float* %arrayidx31, align 4
+  store float %mul45, ptr %arrayidx31, align 4
   %11 = trunc i64 %indvars.iv.next to i32
   %cmp2 = icmp slt i32 %11, 31995
   br i1 %cmp2, label %for.body3, label %for.end
@@ -486,38 +456,32 @@ for.end:
 ;  c[0] = a[0]+b[0];
 ;  c[1] = b[1]+a[1]; // swapped b[1] and a[1]
 
-define void @load_reorder_double(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b){
+define void @load_reorder_double(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b){
 ; CHECK-LABEL: @load_reorder_double(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @load_reorder_double(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
-; SSE2-NEXT:    [[TMP3:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; SSE2-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
+; SSE2-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
 ; SSE2-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; SSE2-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4
+; SSE2-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
-  %1 = load double, double* %a
-  %2 = load double, double* %b
+  %1 = load double, ptr %a
+  %2 = load double, ptr %b
   %3 = fadd double %1, %2
-  store double %3, double* %c
-  %4 = getelementptr inbounds double, double* %b, i64 1
-  %5 = load double, double* %4
-  %6 = getelementptr inbounds double, double* %a, i64 1
-  %7 = load double, double* %6
+  store double %3, ptr %c
+  %4 = getelementptr inbounds double, ptr %b, i64 1
+  %5 = load double, ptr %4
+  %6 = getelementptr inbounds double, ptr %a, i64 1
+  %7 = load double, ptr %6
   %8 = fadd double %5, %7
-  %9 = getelementptr inbounds double, double* %c, i64 1
-  store double %8, double* %9
+  %9 = getelementptr inbounds double, ptr %c, i64 1
+  store double %8, ptr %9
   ret void
 }
 
@@ -527,52 +491,46 @@ define void @load_reorder_double(double* nocapture %c, double* noalias nocapture
 ;  c[2] = a[2]+b[2];
 ;  c[3] = a[3]+b[3];
 
-define void @load_reorder_float(float* nocapture %c, float* noalias nocapture readonly %a, float* noalias nocapture readonly %b){
+define void @load_reorder_float(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b){
 ; CHECK-LABEL: @load_reorder_float(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @load_reorder_float(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; SSE2-NEXT:    [[TMP3:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
 ; SSE2-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    [[TMP6:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; SSE2-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 4
+; SSE2-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
-  %1 = load float, float* %a
-  %2 = load float, float* %b
+  %1 = load float, ptr %a
+  %2 = load float, ptr %b
   %3 = fadd float %1, %2
-  store float %3, float* %c
-  %4 = getelementptr inbounds float, float* %b, i64 1
-  %5 = load float, float* %4
-  %6 = getelementptr inbounds float, float* %a, i64 1
-  %7 = load float, float* %6
+  store float %3, ptr %c
+  %4 = getelementptr inbounds float, ptr %b, i64 1
+  %5 = load float, ptr %4
+  %6 = getelementptr inbounds float, ptr %a, i64 1
+  %7 = load float, ptr %6
   %8 = fadd float %5, %7
-  %9 = getelementptr inbounds float, float* %c, i64 1
-  store float %8, float* %9
-  %10 = getelementptr inbounds float, float* %a, i64 2
-  %11 = load float, float* %10
-  %12 = getelementptr inbounds float, float* %b, i64 2
-  %13 = load float, float* %12
+  %9 = getelementptr inbounds float, ptr %c, i64 1
+  store float %8, ptr %9
+  %10 = getelementptr inbounds float, ptr %a, i64 2
+  %11 = load float, ptr %10
+  %12 = getelementptr inbounds float, ptr %b, i64 2
+  %13 = load float, ptr %12
   %14 = fadd float %11, %13
-  %15 = getelementptr inbounds float, float* %c, i64 2
-  store float %14, float* %15
-  %16 = getelementptr inbounds float, float* %a, i64 3
-  %17 = load float, float* %16
-  %18 = getelementptr inbounds float, float* %b, i64 3
-  %19 = load float, float* %18
+  %15 = getelementptr inbounds float, ptr %c, i64 2
+  store float %14, ptr %15
+  %16 = getelementptr inbounds float, ptr %a, i64 3
+  %17 = load float, ptr %16
+  %18 = getelementptr inbounds float, ptr %b, i64 3
+  %19 = load float, ptr %18
   %20 = fadd float %17, %19
-  %21 = getelementptr inbounds float, float* %c, i64 3
-  store float %20, float* %21
+  %21 = getelementptr inbounds float, ptr %c, i64 3
+  store float %20, ptr %21
   ret void
 }
 
@@ -582,68 +540,60 @@ define void @load_reorder_float(float* nocapture %c, float* noalias nocapture re
 ; a[2] = (b[2]+c[2])+d[2];
 ; a[3] = (b[3]+c[3])+d[3];
 
-define void @opcode_reorder(float* noalias nocapture %a, float* noalias nocapture readonly %b, float* noalias nocapture readonly %c,float* noalias nocapture readonly %d) {
+define void @opcode_reorder(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c,ptr noalias nocapture readonly %d) {
 ; CHECK-LABEL: @opcode_reorder(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[D:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @opcode_reorder(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; SSE2-NEXT:    [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
 ; SSE2-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    [[TMP6:%.*]] = bitcast float* [[D:%.*]] to <4 x float>*
-; SSE2-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; SSE2-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
 ; SSE2-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], [[TMP5]]
-; SSE2-NEXT:    [[TMP9:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; SSE2-NEXT:    store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4
+; SSE2-NEXT:    store <4 x float> [[TMP8]], ptr [[A:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
-  %1 = load float, float* %b
-  %2 = load float, float* %c
+  %1 = load float, ptr %b
+  %2 = load float, ptr %c
   %3 = fadd float %1, %2
-  %4 = load float, float* %d
+  %4 = load float, ptr %d
   %5 = fadd float %3, %4
-  store float %5, float* %a
-  %6 = getelementptr inbounds float, float* %d, i64 1
-  %7 = load float, float* %6
-  %8 = getelementptr inbounds float, float* %b, i64 1
-  %9 = load float, float* %8
-  %10 = getelementptr inbounds float, float* %c, i64 1
-  %11 = load float, float* %10
+  store float %5, ptr %a
+  %6 = getelementptr inbounds float, ptr %d, i64 1
+  %7 = load float, ptr %6
+  %8 = getelementptr inbounds float, ptr %b, i64 1
+  %9 = load float, ptr %8
+  %10 = getelementptr inbounds float, ptr %c, i64 1
+  %11 = load float, ptr %10
   %12 = fadd float %9, %11
   %13 = fadd float %7, %12
-  %14 = getelementptr inbounds float, float* %a, i64 1
-  store float %13, float* %14
-  %15 = getelementptr inbounds float, float* %b, i64 2
-  %16 = load float, float* %15
-  %17 = getelementptr inbounds float, float* %c, i64 2
-  %18 = load float, float* %17
+  %14 = getelementptr inbounds float, ptr %a, i64 1
+  store float %13, ptr %14
+  %15 = getelementptr inbounds float, ptr %b, i64 2
+  %16 = load float, ptr %15
+  %17 = getelementptr inbounds float, ptr %c, i64 2
+  %18 = load float, ptr %17
   %19 = fadd float %16, %18
-  %20 = getelementptr inbounds float, float* %d, i64 2
-  %21 = load float, float* %20
+  %20 = getelementptr inbounds float, ptr %d, i64 2
+  %21 = load float, ptr %20
   %22 = fadd float %19, %21
-  %23 = getelementptr inbounds float, float* %a, i64 2
-  store float %22, float* %23
-  %24 = getelementptr inbounds float, float* %b, i64 3
-  %25 = load float, float* %24
-  %26 = getelementptr inbounds float, float* %c, i64 3
-  %27 = load float, float* %26
+  %23 = getelementptr inbounds float, ptr %a, i64 2
+  store float %22, ptr %23
+  %24 = getelementptr inbounds float, ptr %b, i64 3
+  %25 = load float, ptr %24
+  %26 = getelementptr inbounds float, ptr %c, i64 3
+  %27 = load float, ptr %26
   %28 = fadd float %25, %27
-  %29 = getelementptr inbounds float, float* %d, i64 3
-  %30 = load float, float* %29
+  %29 = getelementptr inbounds float, ptr %d, i64 3
+  %30 = load float, ptr %29
   %31 = fadd float %28, %30
-  %32 = getelementptr inbounds float, float* %a, i64 3
-  store float %31, float* %32
+  %32 = getelementptr inbounds float, ptr %a, i64 3
+  store float %31, ptr %32
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
index d8990b3eec61b..85447902618c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -7,44 +7,41 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; Make sure we can disable slp vectorization in opt.
 
-define void @test1(double* %a, double* %b, double* %c) {
+define void @test1(ptr %a, ptr %b, ptr %c) {
 ; SLP-LABEL: @test1(
 ; SLP-NEXT:  entry:
-; SLP-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; SLP-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; SLP-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; SLP-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; SLP-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; SLP-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; SLP-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
-; SLP-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; SLP-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; SLP-NEXT:    store <2 x double> [[TMP4]], ptr [[C:%.*]], align 8
 ; SLP-NEXT:    ret void
 ;
 ; NOSLP-LABEL: @test1(
 ; NOSLP-NEXT:  entry:
-; NOSLP-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
-; NOSLP-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; NOSLP-NEXT:    [[I0:%.*]] = load double, ptr [[A:%.*]], align 8
+; NOSLP-NEXT:    [[I1:%.*]] = load double, ptr [[B:%.*]], align 8
 ; NOSLP-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
-; NOSLP-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; NOSLP-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; NOSLP-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; NOSLP-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; NOSLP-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; NOSLP-NEXT:    [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
+; NOSLP-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; NOSLP-NEXT:    [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
 ; NOSLP-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
-; NOSLP-NEXT:    store double [[MUL]], double* [[C:%.*]], align 8
-; NOSLP-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
-; NOSLP-NEXT:    store double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; NOSLP-NEXT:    store double [[MUL]], ptr [[C:%.*]], align 8
+; NOSLP-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[C]], i64 1
+; NOSLP-NEXT:    store double [[MUL5]], ptr [[ARRAYIDX5]], align 8
 ; NOSLP-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
index 604b833197893..efb11d2756c3c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
@@ -10,19 +10,19 @@
 define void @f(i1 %x) #0 {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @a, align 8
 ; CHECK-NEXT:    br i1 [[X:%.*]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
 ; CHECK:       while.body.lr.ph:
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1
 ; CHECK-NEXT:    [[ICMP_A1:%.*]] = icmp eq i64 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @b to <2 x i64>*), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b, align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i1> poison, i1 [[ICMP_A1]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[SHUFFLE]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]]
 ; CHECK-NEXT:    br label [[WHILE_END]]
 ; CHECK:       while.end:
 ; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_BODY_LR_PH]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr @c, align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; CHECK-NEXT:    [[ICMP_D0:%.*]] = icmp eq i64 [[TMP7]], 0
 ; CHECK-NEXT:    br i1 [[ICMP_D0]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
@@ -31,20 +31,20 @@ define void @f(i1 %x) #0 {
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP5]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <2 x i64> [[TMP9]], [[TMP6]]
-; CHECK-NEXT:    store <2 x i64> [[TMP10]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP10]], ptr @a, align 8
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %a0 = load i64, i64* getelementptr inbounds (%struct.a, %struct.a* @a, i32 0, i32 0, i32 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds (%struct.a, %struct.a* @a, i32 0, i32 0, i32 1), align 8
+  %a0 = load i64, ptr @a, align 8
+  %a1 = load i64, ptr getelementptr inbounds (%struct.a, ptr @a, i32 0, i32 0, i32 1), align 8
   br i1 %x, label %while.body.lr.ph, label %while.end
 
 while.body.lr.ph:
   %icmp.a1 = icmp eq i64 %a1, 0
-  %b0 = load i64, i64* getelementptr inbounds (%struct.a, %struct.a* @b, i32 0, i32 0, i32 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds (%struct.a, %struct.a* @b, i32 0, i32 0, i32 1), align 8
+  %b0 = load i64, ptr @b, align 8
+  %b1 = load i64, ptr getelementptr inbounds (%struct.a, ptr @b, i32 0, i32 0, i32 1), align 8
   %c0 = select i1 %icmp.a1, i64 %b0, i64 %a0
   %c1 = select i1 %icmp.a1, i64 %b1, i64 %a1
   br label %while.end
@@ -52,8 +52,8 @@ while.body.lr.ph:
 while.end:
   %d0 = phi i64 [ %a0, %entry ], [ %c0, %while.body.lr.ph ]
   %d1 = phi i64 [ %a1, %entry ], [ %c1, %while.body.lr.ph ]
-  %e0 = load i64, i64* getelementptr inbounds (%struct.a, %struct.a* @c, i32 0, i32 0, i32 0), align 8
-  %e1 = load i64, i64* getelementptr inbounds (%struct.a, %struct.a* @c, i32 0, i32 0, i32 1), align 8
+  %e0 = load i64, ptr @c, align 8
+  %e1 = load i64, ptr getelementptr inbounds (%struct.a, ptr @c, i32 0, i32 0, i32 1), align 8
   %icmp.d0 = icmp eq i64 %d0, 0
   br i1 %icmp.d0, label %if.end, label %if.then
 
@@ -61,8 +61,8 @@ if.then:
   %and0.tmp = and i64 %d0, 8
   %and0 = and i64 %and0.tmp, %e0
   %and1 = and i64 %e1, %d1
-  store i64 %and0, i64* getelementptr inbounds (%struct.a, %struct.a* @a, i32 0, i32 0, i32 0), align 8
-  store i64 %and1, i64* getelementptr inbounds (%struct.a, %struct.a* @a, i32 0, i32 0, i32 1), align 8
+  store i64 %and0, ptr @a, align 8
+  store i64 %and1, ptr getelementptr inbounds (%struct.a, ptr @a, i32 0, i32 0, i32 1), align 8
   br label %if.end
 
 if.end:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
index fb2b00612fa83..ad3ebf57ab7a4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -10,7 +10,7 @@ define void @updateModelQPFrame(i32 %m_Bits) {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load double, double* undef, align 8
+  %0 = load double, ptr undef, align 8
   %mul = fmul double undef, %0
   %mul2 = fmul double undef, %mul
   %mul4 = fmul double %0, %mul2
@@ -23,33 +23,33 @@ entry:
   ret void
 }
 
-declare i8* @objc_msgSend(i8*, i8*, ...)
+declare ptr @objc_msgSend(ptr, ptr, ...)
 declare i32 @personality_v0(...)
 
-define void @invoketest() personality i8* bitcast (i32 (...)* @personality_v0 to i8*) {
+define void @invoketest() personality ptr @personality_v0 {
 ; CHECK-LABEL: @invoketest(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 ; CHECK:       cond.true:
-; CHECK-NEXT:    [[CALL49:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    [[CALL49:%.*]] = invoke double @objc_msgSend(ptr undef, ptr undef)
 ; CHECK-NEXT:    to label [[COND_TRUE54:%.*]] unwind label [[LPAD:%.*]]
 ; CHECK:       cond.false:
-; CHECK-NEXT:    [[CALL51:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    [[CALL51:%.*]] = invoke double @objc_msgSend(ptr undef, ptr undef)
 ; CHECK-NEXT:    to label [[COND_FALSE57:%.*]] unwind label [[LPAD]]
 ; CHECK:       cond.true54:
-; CHECK-NEXT:    [[CALL56:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    [[CALL56:%.*]] = invoke double @objc_msgSend(ptr undef, ptr undef)
 ; CHECK-NEXT:    to label [[COND_END60:%.*]] unwind label [[LPAD]]
 ; CHECK:       cond.false57:
-; CHECK-NEXT:    [[CALL59:%.*]] = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+; CHECK-NEXT:    [[CALL59:%.*]] = invoke double @objc_msgSend(ptr undef, ptr undef)
 ; CHECK-NEXT:    to label [[COND_END60]] unwind label [[LPAD]]
 ; CHECK:       cond.end60:
 ; CHECK-NEXT:    br i1 undef, label [[IF_END98:%.*]], label [[IF_THEN63:%.*]]
 ; CHECK:       if.then63:
 ; CHECK-NEXT:    br label [[IF_END98]]
 ; CHECK:       lpad:
-; CHECK-NEXT:    [[L:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    [[L:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:    cleanup
-; CHECK-NEXT:    resume { i8*, i32 } [[L]]
+; CHECK-NEXT:    resume { ptr, i32 } [[L]]
 ; CHECK:       if.end98:
 ; CHECK-NEXT:    br label [[IF_END99:%.*]]
 ; CHECK:       if.end99:
@@ -59,19 +59,19 @@ entry:
   br i1 undef, label %cond.true, label %cond.false
 
 cond.true:
-  %call49 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+  %call49 = invoke double @objc_msgSend(ptr undef, ptr undef)
   to label %cond.true54 unwind label %lpad
 
 cond.false:
-  %call51 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+  %call51 = invoke double @objc_msgSend(ptr undef, ptr undef)
   to label %cond.false57 unwind label %lpad
 
 cond.true54:
-  %call56 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+  %call56 = invoke double @objc_msgSend(ptr undef, ptr undef)
   to label %cond.end60 unwind label %lpad
 
 cond.false57:
-  %call59 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+  %call59 = invoke double @objc_msgSend(ptr undef, ptr undef)
   to label %cond.end60 unwind label %lpad
 
 cond.end60:
@@ -89,9 +89,9 @@ if.then63:
   br label %if.end98
 
 lpad:
-  %l = landingpad { i8*, i32 }
+  %l = landingpad { ptr, i32 }
   cleanup
-  resume { i8*, i32 } %l
+  resume { ptr, i32 } %l
 
 if.end98:
   %dimensionsResult.sroa.0.0 = phi double [ %div71, %if.then63 ], [ %cond126, %cond.end60 ]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
index a7b494e007cc4..dc61cd551b6f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
@@ -25,16 +25,16 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]]
+; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]]
+; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP12]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
 ; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]]
+; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]]
+; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP16]]
 ; CHECK-NEXT:    unreachable
 ;
 entry:
@@ -54,24 +54,24 @@ if.end:                                           ; preds = %land.lhs.true
   %cmp.i4.i = icmp slt i32 %cond.i.i, undef
   %cond.i5.i = select i1 %cmp.i4.i, i32 %cond.i.i, i32 undef
   %idxprom30 = sext i32 %cond.i5.i to i64
-  %arrayidx31 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30
+  %arrayidx31 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30
   %cmp.i.i.1 = icmp sgt i32 %sub14, -1
   %cond.i.i.1 = select i1 %cmp.i.i.1, i32 undef, i32 0
   %cmp.i4.i.1 = icmp slt i32 %cond.i.i.1, undef
   %cond.i5.i.1 = select i1 %cmp.i4.i.1, i32 %cond.i.i.1, i32 undef
   %idxprom30.1 = sext i32 %cond.i5.i.1 to i64
-  %arrayidx31.1 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30.1
+  %arrayidx31.1 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30.1
   %cmp.i.i.2 = icmp sgt i32 %sub14, -5
   %cond.i.i.2 = select i1 %cmp.i.i.2, i32 undef, i32 0
   %cmp.i4.i.2 = icmp slt i32 %cond.i.i.2, undef
   %cond.i5.i.2 = select i1 %cmp.i4.i.2, i32 %cond.i.i.2, i32 undef
   %idxprom30.2 = sext i32 %cond.i5.i.2 to i64
-  %arrayidx31.2 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30.2
+  %arrayidx31.2 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30.2
   %cmp.i.i.3 = icmp sgt i32 %sub14, -9
   %cond.i.i.3 = select i1 %cmp.i.i.3, i32 undef, i32 0
   %cmp.i4.i.3 = icmp slt i32 %cond.i.i.3, undef
   %cond.i5.i.3 = select i1 %cmp.i4.i.3, i32 %cond.i.i.3, i32 undef
   %idxprom30.3 = sext i32 %cond.i5.i.3 to i64
-  %arrayidx31.3 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30.3
+  %arrayidx31.3 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30.3
   unreachable
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index 7aae96a8bc2fa..4f2c95645a753 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.9.0"
 
-;int foo(double *A, int k) {
+;int foo(ptr A, int k) {
 ;  double A0;
 ;  double A1;
 ;  if (k) {
@@ -19,20 +19,18 @@ target triple = "i386-apple-macosx10.9.0"
 ;}
 
 
-define i32 @foo(double* nocapture %A, i32 %k) {
+define i32 @foo(ptr nocapture %A, i32 %k) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[K:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 10
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 10
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[IF_ELSE]] ], [ <double 3.000000e+00, double 5.000000e+00>, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[A]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[A]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
@@ -40,23 +38,23 @@ entry:
   br i1 %tobool, label %if.else, label %if.end
 
 if.else:                                          ; preds = %entry
-  %arrayidx = getelementptr inbounds double, double* %A, i64 10
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds double, double* %A, i64 11
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 10
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %A, i64 11
+  %1 = load double, ptr %arrayidx1, align 8
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.else
   %A0.0 = phi double [ %0, %if.else ], [ 3.000000e+00, %entry ]
   %A1.0 = phi double [ %1, %if.else ], [ 5.000000e+00, %entry ]
-  store double %A0.0, double* %A, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
-  store double %A1.0, double* %arrayidx3, align 8
+  store double %A0.0, ptr %A, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %A, i64 1
+  store double %A1.0, ptr %arrayidx3, align 8
   ret i32 undef
 }
 
 
-;int foo(double * restrict B,  double * restrict A, int n, int m) {
+;int foo(ptr restrict B,  ptr restrict A, int n, int m) {
 ;  double R=A[1];
 ;  double G=A[0];
 ;  for (int i=0; i < 100; i++) {
@@ -72,11 +70,10 @@ if.end:                                           ; preds = %entry, %if.else
 ;  return 0;
 ;}
 
-define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
+define i32 @foo2(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) #0 {
 ; CHECK-LABEL: @foo2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -88,14 +85,13 @@ define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %arrayidx = getelementptr inbounds double, double* %A, i64 1
-  %0 = load double, double* %arrayidx, align 8
-  %1 = load double, double* %A, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 1
+  %0 = load double, ptr %arrayidx, align 8
+  %1 = load double, ptr %A, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -113,13 +109,13 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
-  store double %add5, double* %B, align 8
-  %arrayidx7 = getelementptr inbounds double, double* %B, i64 1
-  store double %add4, double* %arrayidx7, align 8
+  store double %add5, ptr %B, align 8
+  %arrayidx7 = getelementptr inbounds double, ptr %B, i64 1
+  store double %add4, ptr %arrayidx7, align 8
   ret i32 0
 }
 
-; float foo3(float *A) {
+; float foo3(ptr A) {
 ;
 ;   float R = A[0];
 ;   float G = A[1];
@@ -127,23 +123,22 @@ for.end:                                          ; preds = %for.body
 ;   float Y = A[3];
 ;   float P = A[4];
 ;   for (int i=0; i < 121; i+=3) {
-;     R+=A[i+0]*7;
-;     G+=A[i+1]*8;
-;     B+=A[i+2]*9;
-;     Y+=A[i+3]*10;
-;     P+=A[i+4]*11;
+;     R+=Aptr7;
+;     G+=Aptr8;
+;     B+=Aptr9;
+;     Y+=Aptr10;
+;     P+=Aptr11;
 ;   }
 ;
 ;   return R+G+B+Y+P;
 ; }
 
-define float @foo3(float* nocapture readonly %A) #0 {
+define float @foo3(ptr nocapture readonly %A) #0 {
 ; CHECK-LABEL: @foo3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[A:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX1]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i32 1
@@ -157,12 +152,11 @@ define float @foo3(float* nocapture readonly %A) #0 {
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00
 ; CHECK-NEXT:    [[ADD6]] = fadd float [[R_052]], [[MUL]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load float, float* [[ARRAYIDX14]], align 4
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX14]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>*
-; CHECK-NEXT:    [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4
+; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP12]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 undef, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
@@ -182,15 +176,15 @@ define float @foo3(float* nocapture readonly %A) #0 {
 ; CHECK-NEXT:    ret float [[ADD31]]
 ;
 entry:
-  %0 = load float, float* %A, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
-  %1 = load float, float* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
-  %2 = load float, float* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %A, i64 3
-  %3 = load float, float* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %A, i64 4
-  %4 = load float, float* %arrayidx4, align 4
+  %0 = load float, ptr %A, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %A, i64 1
+  %1 = load float, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 2
+  %2 = load float, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %A, i64 3
+  %3 = load float, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %A, i64 4
+  %4 = load float, ptr %arrayidx4, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -207,18 +201,18 @@ for.body:                                         ; preds = %for.body, %entry
   %mul10 = fmul float %5, 8.000000e+00
   %add11 = fadd float %G.053, %mul10
   %7 = add nsw i64 %indvars.iv, 2
-  %arrayidx14 = getelementptr inbounds float, float* %A, i64 %7
-  %8 = load float, float* %arrayidx14, align 4
+  %arrayidx14 = getelementptr inbounds float, ptr %A, i64 %7
+  %8 = load float, ptr %arrayidx14, align 4
   %mul15 = fmul float %8, 9.000000e+00
   %add16 = fadd float %B.054, %mul15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx19 = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
-  %9 = load float, float* %arrayidx19, align 4
+  %arrayidx19 = getelementptr inbounds float, ptr %A, i64 %indvars.iv.next
+  %9 = load float, ptr %arrayidx19, align 4
   %mul20 = fmul float %9, 1.000000e+01
   %add21 = fadd float %Y.055, %mul20
   %10 = add nsw i64 %indvars.iv, 4
-  %arrayidx24 = getelementptr inbounds float, float* %A, i64 %10
-  %11 = load float, float* %arrayidx24, align 4
+  %arrayidx24 = getelementptr inbounds float, ptr %A, i64 %10
+  %11 = load float, ptr %arrayidx24, align 4
   %mul25 = fmul float %11, 1.100000e+01
   %add26 = fadd float %P.056, %mul25
   %12 = trunc i64 %indvars.iv.next to i32
@@ -235,7 +229,7 @@ for.end:                                          ; preds = %for.body
 
 ; Make sure the order of phi nodes of 
diff erent types does not prevent
 ; vectorization of same typed phi nodes.
-define float @sort_phi_type(float* nocapture readonly %A) {
+define float @sort_phi_type(ptr nocapture readonly %A) {
 ; CHECK-LABEL: @sort_phi_type(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -282,25 +276,24 @@ for.end:                                          ; preds = %for.body
   ret float %add31
 }
 
-define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
+define void @test(ptr %i1, ptr %i2, ptr %o) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I1_0:%.*]] = load x86_fp80, x86_fp80* [[I1:%.*]], align 16
-; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr x86_fp80, x86_fp80* [[I1]], i64 1
-; CHECK-NEXT:    [[I1_1:%.*]] = load x86_fp80, x86_fp80* [[I1_GEP1]], align 16
+; CHECK-NEXT:    [[I1_0:%.*]] = load x86_fp80, ptr [[I1:%.*]], align 16
+; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr x86_fp80, ptr [[I1]], i64 1
+; CHECK-NEXT:    [[I1_1:%.*]] = load x86_fp80, ptr [[I1_GEP1]], align 16
 ; CHECK-NEXT:    br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[I2_GEP0:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2:%.*]], i64 0
-; CHECK-NEXT:    [[I2_0:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP0]], align 16
-; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2]], i64 1
-; CHECK-NEXT:    [[I2_1:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP1]], align 16
+; CHECK-NEXT:    [[I2_0:%.*]] = load x86_fp80, ptr [[I2:%.*]], align 16
+; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, ptr [[I2]], i64 1
+; CHECK-NEXT:    [[I2_1:%.*]] = load x86_fp80, ptr [[I2_GEP1]], align 16
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
 ; CHECK-NEXT:    [[PHI0:%.*]] = phi x86_fp80 [ [[I1_0]], [[ENTRY:%.*]] ], [ [[I2_0]], [[THEN]] ]
 ; CHECK-NEXT:    [[PHI1:%.*]] = phi x86_fp80 [ [[I1_1]], [[ENTRY]] ], [ [[I2_1]], [[THEN]] ]
-; CHECK-NEXT:    store x86_fp80 [[PHI0]], x86_fp80* [[O:%.*]], align 16
-; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[O]], i64 1
-; CHECK-NEXT:    store x86_fp80 [[PHI1]], x86_fp80* [[O_GEP1]], align 16
+; CHECK-NEXT:    store x86_fp80 [[PHI0]], ptr [[O:%.*]], align 16
+; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds x86_fp80, ptr [[O]], i64 1
+; CHECK-NEXT:    store x86_fp80 [[PHI1]], ptr [[O_GEP1]], align 16
 ; CHECK-NEXT:    ret void
 ;
 ; Test that we correctly recognize the discontiguous memory in arrays where the
@@ -308,23 +301,22 @@ define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
 ; We disable the vectorization of x86_fp80 for now.
 
 entry:
-  %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
-  %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
-  %i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
+  %i1.0 = load x86_fp80, ptr %i1, align 16
+  %i1.gep1 = getelementptr x86_fp80, ptr %i1, i64 1
+  %i1.1 = load x86_fp80, ptr %i1.gep1, align 16
   br i1 undef, label %then, label %end
 
 then:
-  %i2.gep0 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 0
-  %i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
-  %i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
-  %i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
+  %i2.0 = load x86_fp80, ptr %i2, align 16
+  %i2.gep1 = getelementptr inbounds x86_fp80, ptr %i2, i64 1
+  %i2.1 = load x86_fp80, ptr %i2.gep1, align 16
   br label %end
 
 end:
   %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
   %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
-  store x86_fp80 %phi0, x86_fp80* %o, align 16
-  %o.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %o, i64 1
-  store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
+  store x86_fp80 %phi0, ptr %o, align 16
+  %o.gep1 = getelementptr inbounds x86_fp80, ptr %o, i64 1
+  store x86_fp80 %phi1, ptr %o.gep1, align 16
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
index b46d1cc6f806b..b48480b386a3b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -8,15 +8,15 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 @d = external global double, align 8
 
-declare %struct.GPar.0.16.26* @Rf_gpptr(...)
+declare ptr @Rf_gpptr(...)
 
 define void @Rf_GReset() {
 ; CHECK-LABEL: @Rf_GReset(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @d, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @d, align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
-; CHECK-NEXT:    br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
+; CHECK-NEXT:    br i1 icmp eq (ptr inttoptr (i64 115 to ptr), ptr @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
@@ -31,9 +31,9 @@ define void @Rf_GReset() {
 ;
 entry:
   %sub = fsub double -0.000000e+00, undef
-  %0 = load double, double* @d, align 8
+  %0 = load double, ptr @d, align 8
   %sub1 = fsub double -0.000000e+00, %0
-  br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
+  br i1 icmp eq (ptr inttoptr (i64 115 to ptr), ptr @Rf_gpptr), label %if.then, label %if.end7
 
 if.then:                                          ; preds = %entry
   %sub2 = fsub double %sub, undef
@@ -54,10 +54,10 @@ if.end7:                                          ; preds = %if.then6, %if.then,
 define void @Rf_GReset_unary_fneg() {
 ; CHECK-LABEL: @Rf_GReset_unary_fneg(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @d, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @d, align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
-; CHECK-NEXT:    br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
+; CHECK-NEXT:    br i1 icmp eq (ptr inttoptr (i64 115 to ptr), ptr @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
@@ -72,9 +72,9 @@ define void @Rf_GReset_unary_fneg() {
 ;
 entry:
   %sub = fneg double undef
-  %0 = load double, double* @d, align 8
+  %0 = load double, ptr @d, align 8
   %sub1 = fneg double %0
-  br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
+  br i1 icmp eq (ptr inttoptr (i64 115 to ptr), ptr @Rf_gpptr), label %if.then, label %if.end7
 
 if.then:                                          ; preds = %entry
   %sub2 = fsub double %sub, undef

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
index b67fab54967d7..c758d05cab800 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "f64:64:64-v64:64:64"
 
-define void @test_phi_in_landingpad() personality i8*
+define void @test_phi_in_landingpad() personality ptr
 ; CHECK-LABEL: @test_phi_in_landingpad(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    invoke void @foo()
@@ -13,14 +13,14 @@ define void @test_phi_in_landingpad() personality i8*
 ; CHECK-NEXT:    to label [[DONE:%.*]] unwind label [[LPAD]]
 ; CHECK:       lpad:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
-; CHECK-NEXT:    catch i8* null
+; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:    catch ptr null
 ; CHECK-NEXT:    br label [[DONE]]
 ; CHECK:       done:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ undef, [[INNER]] ], [ [[TMP0]], [[LPAD]] ]
 ; CHECK-NEXT:    ret void
 ;
-  bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  @__gxx_personality_v0 {
 entry:
   invoke void @foo()
   to label %inner unwind label %lpad
@@ -34,7 +34,7 @@ inner:
 lpad:
   %x1 = phi double [ undef, %entry ], [ undef, %inner ]
   %y1 = phi double [ undef, %entry ], [ undef, %inner ]
-  landingpad { i8*, i32 } catch i8* null
+  landingpad { ptr, i32 } catch ptr null
   br label %done
 
 done:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
index b4e29533fb022..be817ef714171 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -6,53 +6,51 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-macosx10.9.0"
 
 
-define void @test(double* %i1, double* %i2, double* %o) {
+define void @test(ptr %i1, ptr %i2, ptr %o) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr double, double* [[I1:%.*]], i64 1
-; CHECK-NEXT:    [[I1_0:%.*]] = load double, double* [[I1]], align 16
-; CHECK-NEXT:    [[I1_1:%.*]] = load double, double* [[I1_GEP1]], align 16
+; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr double, ptr [[I1:%.*]], i64 1
+; CHECK-NEXT:    [[I1_0:%.*]] = load double, ptr [[I1]], align 16
+; CHECK-NEXT:    [[I1_1:%.*]] = load double, ptr [[I1_GEP1]], align 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I1_0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1_1]], i32 1
 ; CHECK-NEXT:    br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[I2_GEP0:%.*]] = getelementptr inbounds double, double* [[I2:%.*]], i64 0
-; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds double, double* [[I2]], i64 1
-; CHECK-NEXT:    [[I2_0:%.*]] = load double, double* [[I2_GEP0]], align 16
-; CHECK-NEXT:    [[I2_1:%.*]] = load double, double* [[I2_GEP1]], align 16
+; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds double, ptr [[I2:%.*]], i64 1
+; CHECK-NEXT:    [[I2_0:%.*]] = load double, ptr [[I2]], align 16
+; CHECK-NEXT:    [[I2_1:%.*]] = load double, ptr [[I2_GEP1]], align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[I2_0]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[I2_1]], i32 1
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3]], [[THEN]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT:    store double [[TMP5]], double* [[O:%.*]], align 16
-; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds double, double* [[O]], i64 1
+; CHECK-NEXT:    store double [[TMP5]], ptr [[O:%.*]], align 16
+; CHECK-NEXT:    [[O_GEP1:%.*]] = getelementptr inbounds double, ptr [[O]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT:    store double [[TMP6]], double* [[O_GEP1]], align 16
+; CHECK-NEXT:    store double [[TMP6]], ptr [[O_GEP1]], align 16
 ; CHECK-NEXT:    ret void
 ;
 ; Test that we correctly recognize the discontiguous memory in arrays where the
 ; size is less than the alignment, and through various 
diff erent GEP formations.
 
 entry:
-  %i1.0 = load double, double* %i1, align 16
-  %i1.gep1 = getelementptr double, double* %i1, i64 1
-  %i1.1 = load double, double* %i1.gep1, align 16
+  %i1.0 = load double, ptr %i1, align 16
+  %i1.gep1 = getelementptr double, ptr %i1, i64 1
+  %i1.1 = load double, ptr %i1.gep1, align 16
   br i1 undef, label %then, label %end
 
 then:
-  %i2.gep0 = getelementptr inbounds double, double* %i2, i64 0
-  %i2.0 = load double, double* %i2.gep0, align 16
-  %i2.gep1 = getelementptr inbounds double, double* %i2, i64 1
-  %i2.1 = load double, double* %i2.gep1, align 16
+  %i2.0 = load double, ptr %i2, align 16
+  %i2.gep1 = getelementptr inbounds double, ptr %i2, i64 1
+  %i2.1 = load double, ptr %i2.gep1, align 16
   br label %end
 
 end:
   %phi0 = phi double [ %i1.0, %entry ], [ %i2.0, %then ]
   %phi1 = phi double [ %i1.1, %entry ], [ %i2.1, %then ]
-  store double %phi0, double* %o, align 16
-  %o.gep1 = getelementptr inbounds double, double* %o, i64 1
-  store double %phi1, double* %o.gep1, align 16
+  store double %phi0, ptr %o, align 16
+  %o.gep1 = getelementptr inbounds double, ptr %o, i64 1
+  store double %phi1, ptr %o.gep1, align 16
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
index 112cb73961e92..c61d1fbc39479 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
@@ -3,128 +3,122 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx  | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
 
-define void @powof2div_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+define void @powof2div_uniform(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c){
 ; CHECK-LABEL: @powof2div_uniform(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
-  store i32 %div, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %div, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 2
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %div6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %div6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 2
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %div11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %div11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 2
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %div16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %div16, ptr %arrayidx17, align 4
   ret void
 }
 
-define void @powof2div_nonuniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+define void @powof2div_nonuniform(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c){
 ; SSE-LABEL: @powof2div_nonuniform(
 ; SSE-NEXT:  entry:
-; SSE-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
-; SSE-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C:%.*]], align 4
+; SSE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load i32, ptr [[C:%.*]], align 4
 ; SSE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
 ; SSE-NEXT:    [[DIV:%.*]] = sdiv i32 [[ADD]], 2
-; SSE-NEXT:    store i32 [[DIV]], i32* [[A:%.*]], align 4
-; SSE-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 1
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
-; SSE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 1
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; SSE-NEXT:    store i32 [[DIV]], ptr [[A:%.*]], align 4
+; SSE-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 1
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; SSE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 ; SSE-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
 ; SSE-NEXT:    [[DIV6:%.*]] = sdiv i32 [[ADD5]], 4
-; SSE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
-; SSE-NEXT:    store i32 [[DIV6]], i32* [[ARRAYIDX7]], align 4
-; SSE-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4
-; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2
-; SSE-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+; SSE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; SSE-NEXT:    store i32 [[DIV6]], ptr [[ARRAYIDX7]], align 4
+; SSE-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 2
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4
+; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 2
+; SSE-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
 ; SSE-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
 ; SSE-NEXT:    [[DIV11:%.*]] = sdiv i32 [[ADD10]], 8
-; SSE-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
-; SSE-NEXT:    store i32 [[DIV11]], i32* [[ARRAYIDX12]], align 4
-; SSE-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4
-; SSE-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4
+; SSE-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2
+; SSE-NEXT:    store i32 [[DIV11]], ptr [[ARRAYIDX12]], align 4
+; SSE-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 3
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4
+; SSE-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 3
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
 ; SSE-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
 ; SSE-NEXT:    [[DIV16:%.*]] = sdiv i32 [[ADD15]], 16
-; SSE-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
-; SSE-NEXT:    store i32 [[DIV16]], i32* [[ARRAYIDX17]], align 4
+; SSE-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3
+; SSE-NEXT:    store i32 [[DIV16]], ptr [[ARRAYIDX17]], align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @powof2div_nonuniform(
 ; AVX-NEXT:  entry:
-; AVX-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; AVX-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; AVX-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; AVX-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], <i32 2, i32 4, i32 8, i32 16>
-; AVX-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; AVX-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; AVX-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; AVX-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
-  store i32 %div, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %div, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %div6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %div6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 8
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %div11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %div11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 16
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %div16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %div16, ptr %arrayidx17, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
index f8c58afd33afb..d8e646b544179 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
@@ -3,255 +3,231 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx  | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
 
-define void @powof2mul_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+define void @powof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c){
 ; CHECK-LABEL: @powof2mul_uniform(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %mul = mul i32 %add, 2
-  store i32 %mul, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %mul, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %mul6 = mul i32 %add5, 2
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %mul6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %mul6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %mul11 = mul i32 %add10, 2
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %mul11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %mul11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %mul16 = mul i32 %add15, 2
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %mul16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %mul16, ptr %arrayidx17, align 4
   ret void
 }
 
-define void @negpowof2mul_uniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+define void @negpowof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c){
 ; CHECK-LABEL: @negpowof2mul_uniform(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], <i32 -2, i32 -2, i32 -2, i32 -2>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %mul = mul i32 %add, -2
-  store i32 %mul, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %mul, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %mul6 = mul i32 %add5, -2
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %mul6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %mul6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %mul11 = mul i32 %add10, -2
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %mul11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %mul11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %mul16 = mul i32 %add15, -2
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %mul16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %mul16, ptr %arrayidx17, align 4
   ret void
 }
 
-define void @powof2mul_nonuniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+define void @powof2mul_nonuniform(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c){
 ; CHECK-LABEL: @powof2mul_nonuniform(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], <i32 2, i32 4, i32 8, i32 16>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %mul = mul i32 %add, 2
-  store i32 %mul, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %mul, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %mul6 = mul i32 %add5, 4
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %mul6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %mul6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %mul11 = mul i32 %add10, 8
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %mul11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %mul11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %mul16 = mul i32 %add15, 16
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %mul16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %mul16, ptr %arrayidx17, align 4
   ret void
 }
 
-define void @negpowof2mul_nonuniform(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+define void @negpowof2mul_nonuniform(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c){
 ; CHECK-LABEL: @negpowof2mul_nonuniform(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], <i32 -2, i32 -4, i32 -8, i32 -16>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %b, align 4
-  %1 = load i32, i32* %c, align 4
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %c, align 4
   %add = add nsw i32 %1, %0
   %mul = mul i32 %add, -2
-  store i32 %mul, i32* %a, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32, i32* %arrayidx4, align 4
+  store i32 %mul, ptr %a, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 1
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %mul6 = mul i32 %add5, -4
-  %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %mul6, i32* %arrayidx7, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32, i32* %arrayidx8, align 4
-  %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32, i32* %arrayidx9, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %mul6, ptr %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %b, i64 2
+  %4 = load i32, ptr %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2
+  %5 = load i32, ptr %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %mul11 = mul i32 %add10, -8
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %mul11, i32* %arrayidx12, align 4
-  %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32, i32* %arrayidx13, align 4
-  %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32, i32* %arrayidx14, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %mul11, ptr %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %b, i64 3
+  %6 = load i32, ptr %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32, ptr %c, i64 3
+  %7 = load i32, ptr %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %mul16 = mul i32 %add15, -16
-  %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %mul16, i32* %arrayidx17, align 4
+  %arrayidx17 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %mul16, ptr %arrayidx17, align 4
   ret void
 }
 
-define void @PR51436(i64* nocapture %a) {
+define void @PR51436(ptr nocapture %a) {
 ; SSE-LABEL: @PR51436(
 ; SSE-NEXT:  entry:
-; SSE-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 2
-; SSE-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 4
-; SSE-NEXT:    [[GEP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 6
-; SSE-NEXT:    [[TMP0:%.*]] = bitcast i64* [[A]] to <2 x i64>*
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; SSE-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 2
+; SSE-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 4
+; SSE-NEXT:    [[GEP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 6
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], <i64 -17592186044416, i64 -17592186044416>
 ; SSE-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP2]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP4:%.*]] = bitcast i64* [[A]] to <2 x i64>*
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 8
-; SSE-NEXT:    [[TMP5:%.*]] = bitcast i64* [[GEP2]] to <2 x i64>*
-; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr [[A]], align 8
+; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[GEP2]], align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], <i64 -17592186044416, i64 -17592186044416>
 ; SSE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP7]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP9:%.*]] = bitcast i64* [[GEP2]] to <2 x i64>*
-; SSE-NEXT:    store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8
-; SSE-NEXT:    [[TMP10:%.*]] = bitcast i64* [[GEP4]] to <2 x i64>*
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 8
+; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr [[GEP2]], align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr [[GEP4]], align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = mul <2 x i64> [[TMP11]], <i64 -17592186044416, i64 -17592186044416>
 ; SSE-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[TMP12]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP14:%.*]] = bitcast i64* [[GEP4]] to <2 x i64>*
-; SSE-NEXT:    store <2 x i64> [[TMP13]], <2 x i64>* [[TMP14]], align 8
-; SSE-NEXT:    [[TMP15:%.*]] = bitcast i64* [[GEP6]] to <2 x i64>*
-; SSE-NEXT:    [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 8
+; SSE-NEXT:    store <2 x i64> [[TMP13]], ptr [[GEP4]], align 8
+; SSE-NEXT:    [[TMP16:%.*]] = load <2 x i64>, ptr [[GEP6]], align 8
 ; SSE-NEXT:    [[TMP17:%.*]] = mul <2 x i64> [[TMP16]], <i64 -17592186044416, i64 -17592186044416>
 ; SSE-NEXT:    [[TMP18:%.*]] = add <2 x i64> [[TMP17]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP19:%.*]] = bitcast i64* [[GEP6]] to <2 x i64>*
-; SSE-NEXT:    store <2 x i64> [[TMP18]], <2 x i64>* [[TMP19]], align 8
+; SSE-NEXT:    store <2 x i64> [[TMP18]], ptr [[GEP6]], align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @PR51436(
 ; AVX-NEXT:  entry:
-; AVX-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 4
-; AVX-NEXT:    [[TMP0:%.*]] = bitcast i64* [[A]] to <4 x i64>*
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8
+; AVX-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[A]], align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
 ; AVX-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP2]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
-; AVX-NEXT:    [[TMP4:%.*]] = bitcast i64* [[A]] to <4 x i64>*
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* [[TMP4]], align 8
-; AVX-NEXT:    [[TMP5:%.*]] = bitcast i64* [[GEP4]] to <4 x i64>*
-; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, <4 x i64>* [[TMP5]], align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr [[A]], align 8
+; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[GEP4]], align 8
 ; AVX-NEXT:    [[TMP7:%.*]] = mul <4 x i64> [[TMP6]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
 ; AVX-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[TMP7]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
-; AVX-NEXT:    [[TMP9:%.*]] = bitcast i64* [[GEP4]] to <4 x i64>*
-; AVX-NEXT:    store <4 x i64> [[TMP8]], <4 x i64>* [[TMP9]], align 8
+; AVX-NEXT:    store <4 x i64> [[TMP8]], ptr [[GEP4]], align 8
 ; AVX-NEXT:    ret void
 ;
 entry:
-  %gep1 = getelementptr inbounds i64, i64* %a, i64 1
-  %gep2 = getelementptr inbounds i64, i64* %a, i64 2
-  %gep3 = getelementptr inbounds i64, i64* %a, i64 3
-  %gep4 = getelementptr inbounds i64, i64* %a, i64 4
-  %gep5 = getelementptr inbounds i64, i64* %a, i64 5
-  %gep6 = getelementptr inbounds i64, i64* %a, i64 6
-  %gep7 = getelementptr inbounds i64, i64* %a, i64 7
-  %load0 = load i64, i64* %a, align 8
-  %load1 = load i64, i64* %gep1, align 8
-  %load2 = load i64, i64* %gep2, align 8
-  %load3 = load i64, i64* %gep3, align 8
-  %load4 = load i64, i64* %gep4, align 8
-  %load5 = load i64, i64* %gep5, align 8
-  %load6 = load i64, i64* %gep6, align 8
-  %load7 = load i64, i64* %gep7, align 8
+  %gep1 = getelementptr inbounds i64, ptr %a, i64 1
+  %gep2 = getelementptr inbounds i64, ptr %a, i64 2
+  %gep3 = getelementptr inbounds i64, ptr %a, i64 3
+  %gep4 = getelementptr inbounds i64, ptr %a, i64 4
+  %gep5 = getelementptr inbounds i64, ptr %a, i64 5
+  %gep6 = getelementptr inbounds i64, ptr %a, i64 6
+  %gep7 = getelementptr inbounds i64, ptr %a, i64 7
+  %load0 = load i64, ptr %a, align 8
+  %load1 = load i64, ptr %gep1, align 8
+  %load2 = load i64, ptr %gep2, align 8
+  %load3 = load i64, ptr %gep3, align 8
+  %load4 = load i64, ptr %gep4, align 8
+  %load5 = load i64, ptr %gep5, align 8
+  %load6 = load i64, ptr %gep6, align 8
+  %load7 = load i64, ptr %gep7, align 8
   %mul0 = mul i64 %load0, -17592186044416
   %mul1 = mul i64 %load1, -17592186044416
   %mul2 = mul i64 %load2, -17592186044416
@@ -268,13 +244,13 @@ entry:
   %add5 = add i64 %mul5, -17592186044416
   %add6 = add i64 %mul6, -17592186044416
   %add7 = add i64 %mul7, -17592186044416
-  store i64 %add0, i64* %a, align 8
-  store i64 %add1, i64* %gep1, align 8
-  store i64 %add2, i64* %gep2, align 8
-  store i64 %add3, i64* %gep3, align 8
-  store i64 %add4, i64* %gep4, align 8
-  store i64 %add5, i64* %gep5, align 8
-  store i64 %add6, i64* %gep6, align 8
-  store i64 %add7, i64* %gep7, align 8
+  store i64 %add0, ptr %a, align 8
+  store i64 %add1, ptr %gep1, align 8
+  store i64 %add2, ptr %gep2, align 8
+  store i64 %add3, ptr %gep3, align 8
+  store i64 %add4, ptr %gep4, align 8
+  store i64 %add5, ptr %gep5, align 8
+  store i64 %add6, ptr %gep6, align 8
+  store i64 %add7, ptr %gep7, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
index f998cc62eae4a..bc1ea967f5943 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
@@ -13,30 +13,30 @@ define void @f() {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @g()
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @c, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
 ; CHECK-NEXT:    [[LNOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[LNOT]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* @a, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr @a, align 2
 ; CHECK-NEXT:    [[LNOT2:%.*]] = icmp eq i16 [[TMP1]], 0
 ; CHECK-NEXT:    [[LNOT_EXT3:%.*]] = zext i1 [[LNOT2]] to i32
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[LNOT_EXT3]], [[LNOT_EXT]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], [[OR]]
 ; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP]] to i16
-; CHECK-NEXT:    store i16 [[CONV4]], i16* @b, align 2
+; CHECK-NEXT:    store i16 [[CONV4]], ptr @b, align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %call = tail call i32 (...) @g()
-  %0 = load i32, i32* @c, align 4
+  %0 = load i32, ptr @c, align 4
   %lnot = icmp eq i32 %0, 0
   %lnot.ext = zext i1 %lnot to i32
-  %1 = load i16, i16* @a, align 2
+  %1 = load i16, ptr @a, align 2
   %lnot2 = icmp eq i16 %1, 0
   %lnot.ext3 = zext i1 %lnot2 to i32
   %or = or i32 %lnot.ext3, %lnot.ext
   %cmp = icmp eq i32 %call, %or
   %conv4 = zext i1 %cmp to i16
-  store i16 %conv4, i16* @b, align 2
+  store i16 %conv4, ptr @b, align 2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index 378300a24fa2b..ff4ef6086d42a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -3,16 +3,16 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386--netbsd"
 
- at a = common global i32* null, align 4
+ at a = common global ptr null, align 4
 
 ; Function Attrs: noreturn nounwind readonly
 define i32 @fn1() #0 {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @a, align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @a, align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
 ; CHECK-NEXT:    br label [[DO_BODY:%.*]]
 ; CHECK:       do.body:
 ; CHECK-NEXT:    [[C_0:%.*]] = phi i32 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[DO_BODY]] ]
@@ -22,10 +22,10 @@ define i32 @fn1() #0 {
 ; CHECK-NEXT:    br label [[DO_BODY]]
 ;
 entry:
-  %0 = load i32*, i32** @a, align 4, !tbaa !4
-  %1 = load i32, i32* %0, align 4, !tbaa !5
-  %arrayidx1 = getelementptr inbounds i32, i32* %0, i32 1
-  %2 = load i32, i32* %arrayidx1, align 4, !tbaa !5
+  %0 = load ptr, ptr @a, align 4, !tbaa !4
+  %1 = load i32, ptr %0, align 4, !tbaa !5
+  %arrayidx1 = getelementptr inbounds i32, ptr %0, i32 1
+  %2 = load i32, ptr %arrayidx1, align 4, !tbaa !5
   br label %do.body
 
 do.body:                                          ; preds = %do.body, %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
index 0af3858f7d334..1ba15a148dfec 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
@@ -5,90 +5,80 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @store_chains(double* %x) {
+define void @store_chains(ptr %x) {
 ; AVX-LABEL: @store_chains(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast double* [[X:%.*]] to <4 x double>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[X:%.*]], align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP2]]
 ; AVX-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], [[TMP2]]
-; AVX-NEXT:    [[TMP5:%.*]] = bitcast double* [[X]] to <4 x double>*
-; AVX-NEXT:    store <4 x double> [[TMP4]], <4 x double>* [[TMP5]], align 8
+; AVX-NEXT:    store <4 x double> [[TMP4]], ptr [[X]], align 8
 ; AVX-NEXT:    ret void
 ;
 ; MAX128-LABEL: @store_chains(
-; MAX128-NEXT:    [[TMP1:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
-; MAX128-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; MAX128-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; MAX128-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP2]]
 ; MAX128-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP2]]
-; MAX128-NEXT:    [[TMP5:%.*]] = bitcast double* [[X]] to <2 x double>*
-; MAX128-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
-; MAX128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[X]], i64 2
-; MAX128-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <2 x double>*
-; MAX128-NEXT:    [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 8
+; MAX128-NEXT:    store <2 x double> [[TMP4]], ptr [[X]], align 8
+; MAX128-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; MAX128-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
 ; MAX128-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[TMP8]]
 ; MAX128-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], [[TMP8]]
-; MAX128-NEXT:    [[TMP11:%.*]] = bitcast double* [[TMP6]] to <2 x double>*
-; MAX128-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; MAX128-NEXT:    store <2 x double> [[TMP10]], ptr [[TMP6]], align 8
 ; MAX128-NEXT:    ret void
 ;
-  %1 = load double, double* %x, align 8
+  %1 = load double, ptr %x, align 8
   %2 = fadd double %1, %1
   %3 = fadd double %2, %1
-  store double %3, double* %x, align 8
-  %4 = getelementptr inbounds double, double* %x, i64 1
-  %5 = load double, double* %4, align 8
+  store double %3, ptr %x, align 8
+  %4 = getelementptr inbounds double, ptr %x, i64 1
+  %5 = load double, ptr %4, align 8
   %6 = fadd double %5, %5
   %7 = fadd double %6, %5
-  store double %7, double* %4, align 8
-  %8 = getelementptr inbounds double, double* %x, i64 2
-  %9 = load double, double* %8, align 8
+  store double %7, ptr %4, align 8
+  %8 = getelementptr inbounds double, ptr %x, i64 2
+  %9 = load double, ptr %8, align 8
   %10 = fadd double %9, %9
   %11 = fadd double %10, %9
-  store double %11, double* %8, align 8
-  %12 = getelementptr inbounds double, double* %x, i64 3
-  %13 = load double, double* %12, align 8
+  store double %11, ptr %8, align 8
+  %12 = getelementptr inbounds double, ptr %x, i64 3
+  %13 = load double, ptr %12, align 8
   %14 = fadd double %13, %13
   %15 = fadd double %14, %13
-  store double %15, double* %12, align 8
+  store double %15, ptr %12, align 8
   ret void
 }
 
-define void @store_chains_prefer_width_attr(double* %x) #0 {
+define void @store_chains_prefer_width_attr(ptr %x) #0 {
 ; ANY-LABEL: @store_chains_prefer_width_attr(
-; ANY-NEXT:    [[TMP1:%.*]] = bitcast double* [[X:%.*]] to <2 x double>*
-; ANY-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; ANY-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; ANY-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP2]]
 ; ANY-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP2]]
-; ANY-NEXT:    [[TMP5:%.*]] = bitcast double* [[X]] to <2 x double>*
-; ANY-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
-; ANY-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[X]], i64 2
-; ANY-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <2 x double>*
-; ANY-NEXT:    [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[TMP7]], align 8
+; ANY-NEXT:    store <2 x double> [[TMP4]], ptr [[X]], align 8
+; ANY-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; ANY-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
 ; ANY-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[TMP8]]
 ; ANY-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], [[TMP8]]
-; ANY-NEXT:    [[TMP11:%.*]] = bitcast double* [[TMP6]] to <2 x double>*
-; ANY-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; ANY-NEXT:    store <2 x double> [[TMP10]], ptr [[TMP6]], align 8
 ; ANY-NEXT:    ret void
 ;
-  %1 = load double, double* %x, align 8
+  %1 = load double, ptr %x, align 8
   %2 = fadd double %1, %1
   %3 = fadd double %2, %1
-  store double %3, double* %x, align 8
-  %4 = getelementptr inbounds double, double* %x, i64 1
-  %5 = load double, double* %4, align 8
+  store double %3, ptr %x, align 8
+  %4 = getelementptr inbounds double, ptr %x, i64 1
+  %5 = load double, ptr %4, align 8
   %6 = fadd double %5, %5
   %7 = fadd double %6, %5
-  store double %7, double* %4, align 8
-  %8 = getelementptr inbounds double, double* %x, i64 2
-  %9 = load double, double* %8, align 8
+  store double %7, ptr %4, align 8
+  %8 = getelementptr inbounds double, ptr %x, i64 2
+  %9 = load double, ptr %8, align 8
   %10 = fadd double %9, %9
   %11 = fadd double %10, %9
-  store double %11, double* %8, align 8
-  %12 = getelementptr inbounds double, double* %x, i64 3
-  %13 = load double, double* %12, align 8
+  store double %11, ptr %8, align 8
+  %12 = getelementptr inbounds double, ptr %x, i64 3
+  %13 = load double, ptr %12, align 8
   %14 = fadd double %13, %13
   %15 = fadd double %14, %13
-  store double %15, double* %12, align 8
+  store double %15, ptr %12, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
index 9336f3a3bf06e..8f96968b978a5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
@@ -7,52 +7,48 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @total = global i64 0, align 8
 
-define void @_Z3fooPml(i64* nocapture %a, i64 %i) {
+define void @_Z3fooPml(ptr nocapture %a, i64 %i) {
 ; CHECK-LABEL: @_Z3fooPml(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 4, i64 4>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[A]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* @total, align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr [[A]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr @total, align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* @total, align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64* [[A]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 8
+; CHECK-NEXT:    store i64 [[ADD]], ptr @total, align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr <2 x i64> [[TMP5]], <i64 4, i64 4>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[A]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = load i64, i64* @total, align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr @total, align 8
 ; CHECK-NEXT:    [[ADD9:%.*]] = add i64 [[TMP7]], [[TMP6]]
-; CHECK-NEXT:    store i64 [[ADD9]], i64* @total, align 8
+; CHECK-NEXT:    store i64 [[ADD9]], ptr @total, align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %tmp = load i64, i64* %a, align 8
+  %tmp = load i64, ptr %a, align 8
   %shr = lshr i64 %tmp, 4
-  store i64 %shr, i64* %a, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
-  %tmp1 = load i64, i64* %arrayidx1, align 8
+  store i64 %shr, ptr %a, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 1
+  %tmp1 = load i64, ptr %arrayidx1, align 8
   %shr2 = lshr i64 %tmp1, 4
-  store i64 %shr2, i64* %arrayidx1, align 8
-  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 %i
-  %tmp2 = load i64, i64* %arrayidx3, align 8
-  %tmp3 = load i64, i64* @total, align 8
+  store i64 %shr2, ptr %arrayidx1, align 8
+  %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 %i
+  %tmp2 = load i64, ptr %arrayidx3, align 8
+  %tmp3 = load i64, ptr @total, align 8
   %add = add i64 %tmp3, %tmp2
-  store i64 %add, i64* @total, align 8
-  %tmp4 = load i64, i64* %a, align 8
+  store i64 %add, ptr @total, align 8
+  %tmp4 = load i64, ptr %a, align 8
   %shr5 = lshr i64 %tmp4, 4
-  store i64 %shr5, i64* %a, align 8
-  %tmp5 = load i64, i64* %arrayidx1, align 8
+  store i64 %shr5, ptr %a, align 8
+  %tmp5 = load i64, ptr %arrayidx1, align 8
   %shr7 = lshr i64 %tmp5, 4
-  store i64 %shr7, i64* %arrayidx1, align 8
-  %tmp6 = load i64, i64* %arrayidx3, align 8
-  %tmp7 = load i64, i64* @total, align 8
+  store i64 %shr7, ptr %arrayidx1, align 8
+  %tmp6 = load i64, ptr %arrayidx3, align 8
+  %tmp7 = load i64, ptr @total, align 8
   %add9 = add i64 %tmp7, %tmp6
-  store i64 %add9, i64* @total, align 8
+  store i64 %add9, ptr @total, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
index b7de3a39e80f8..ea6989b8bbabb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll
@@ -5,21 +5,18 @@ target triple = "x86_64-pc-windows-msvc18.0.0"
 
 %struct.B = type { i64, i64 }
 
-define void @test1(%struct.B* %p) personality i32 (...)* @__CxxFrameHandler3 {
+define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  invoke.cont:
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], %struct.B* [[P:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[GEP1]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[GEP1]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr [[P]], align 8
 ; CHECK-NEXT:    invoke void @throw()
 ; CHECK-NEXT:    to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
 ; CHECK:       catch.dispatch:
 ; CHECK-NEXT:    [[CS:%.*]] = catchswitch within none [label %invoke.cont1] unwind label [[EHCLEANUP:%.*]]
 ; CHECK:       invoke.cont1:
-; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS]] [i8* null, i32 64, i8* null]
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS]] [ptr null, i32 64, ptr null]
 ; CHECK-NEXT:    invoke void @throw() [ "funclet"(token [[CATCH]]) ]
 ; CHECK-NEXT:    to label [[UNREACHABLE]] unwind label [[EHCLEANUP]]
 ; CHECK:       ehcleanup:
@@ -31,12 +28,11 @@ define void @test1(%struct.B* %p) personality i32 (...)* @__CxxFrameHandler3 {
 ; CHECK-NEXT:    unreachable
 ;
 invoke.cont:
-  %gep1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 0
-  %gep2 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 1
-  %load1 = load i64, i64* %gep1, align 8
-  %load2 = load i64, i64* %gep2, align 8
-  store i64 %load1, i64* %gep1, align 8
-  store i64 %load2, i64* %gep2, align 8
+  %gep2 = getelementptr inbounds %struct.B, ptr %p, i64 0, i32 1
+  %load1 = load i64, ptr %p, align 8
+  %load2 = load i64, ptr %gep2, align 8
+  store i64 %load1, ptr %p, align 8
+  store i64 %load2, ptr %gep2, align 8
   invoke void @throw()
   to label %unreachable unwind label %catch.dispatch
 
@@ -44,7 +40,7 @@ catch.dispatch:                                   ; preds = %invoke.cont
   %cs = catchswitch within none [label %invoke.cont1] unwind label %ehcleanup
 
 invoke.cont1:                                     ; preds = %catch.dispatch
-  %catch = catchpad within %cs [i8* null, i32 64, i8* null]
+  %catch = catchpad within %cs [ptr null, i32 64, ptr null]
   invoke void @throw() [ "funclet"(token %catch) ]
   to label %unreachable unwind label %ehcleanup
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
index 79649cfcacf72..dd51105e1a6ca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-- -mcpu=corei7 < %s | FileCheck %s
 
-define void @test1(float %a, float %b, float %c, float %d, i32* nocapture %p) {
+define void @test1(float %a, float %b, float %c, float %d, ptr nocapture %p) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
@@ -9,8 +9,7 @@ define void @test1(float %a, float %b, float %c, float %d, i32* nocapture %p) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -18,17 +17,17 @@ entry:
   %conv1 = fptosi float %b to i32
   %conv3 = fptosi float %c to i32
   %conv5 = fptosi float %d to i32
-  %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
-  store i32 %conv, i32* %p, align 4, !tbaa !2
-  %incdec.ptr8 = getelementptr inbounds i32, i32* %p, i64 2
-  store i32 %conv1, i32* %incdec.ptr, align 4, !tbaa !2
-  %incdec.ptr10 = getelementptr inbounds i32, i32* %p, i64 3
-  store i32 %conv3, i32* %incdec.ptr8, align 4, !tbaa !2
-  store i32 %conv5, i32* %incdec.ptr10, align 4, !tbaa !2
+  %incdec.ptr = getelementptr inbounds i32, ptr %p, i64 1
+  store i32 %conv, ptr %p, align 4, !tbaa !2
+  %incdec.ptr8 = getelementptr inbounds i32, ptr %p, i64 2
+  store i32 %conv1, ptr %incdec.ptr, align 4, !tbaa !2
+  %incdec.ptr10 = getelementptr inbounds i32, ptr %p, i64 3
+  store i32 %conv3, ptr %incdec.ptr8, align 4, !tbaa !2
+  store i32 %conv5, ptr %incdec.ptr10, align 4, !tbaa !2
   ret void
 }
 
-define void @test1_vec(float %a, float %b, float %c, float %d, <4 x i32>* nocapture %p) {
+define void @test1_vec(float %a, float %b, float %c, float %d, ptr nocapture %p) {
 ; CHECK-LABEL: @test1_vec(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
@@ -36,7 +35,7 @@ define void @test1_vec(float %a, float %b, float %c, float %d, <4 x i32>* nocapt
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[C:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[D:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = fptosi <4 x float> [[TMP3]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[P:%.*]], align 16, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 16, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -48,11 +47,11 @@ entry:
   %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
   %conv5 = fptosi float %d to i32
   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
-  store <4 x i32> %vecinit6, <4 x i32>* %p, align 16, !tbaa !2
+  store <4 x i32> %vecinit6, ptr %p, align 16, !tbaa !2
   ret void
 }
 
-define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, i32* nocapture %p) {
+define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture %p) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
@@ -60,8 +59,7 @@ define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, i32* nocapture %p) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -69,24 +67,24 @@ entry:
   %add1 = add nsw i32 %b, 1
   %add3 = add nsw i32 %c, 1
   %add5 = add nsw i32 %d, 1
-  %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
-  store i32 %add, i32* %p, align 4, !tbaa !2
-  %incdec.ptr8 = getelementptr inbounds i32, i32* %p, i64 2
-  store i32 %add1, i32* %incdec.ptr, align 4, !tbaa !2
-  %incdec.ptr10 = getelementptr inbounds i32, i32* %p, i64 3
-  store i32 %add3, i32* %incdec.ptr8, align 4, !tbaa !2
-  store i32 %add5, i32* %incdec.ptr10, align 4, !tbaa !2
+  %incdec.ptr = getelementptr inbounds i32, ptr %p, i64 1
+  store i32 %add, ptr %p, align 4, !tbaa !2
+  %incdec.ptr8 = getelementptr inbounds i32, ptr %p, i64 2
+  store i32 %add1, ptr %incdec.ptr, align 4, !tbaa !2
+  %incdec.ptr10 = getelementptr inbounds i32, ptr %p, i64 3
+  store i32 %add3, ptr %incdec.ptr8, align 4, !tbaa !2
+  store i32 %add5, ptr %incdec.ptr10, align 4, !tbaa !2
   ret void
 }
 
-define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, <4 x i32>* nocapture %4) {
+define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) {
 ; CHECK-LABEL: @test2_vec(
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP4:%.*]], align 16, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP4:%.*]], align 16, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
   %6 = add nsw i32 %0, 1
@@ -97,7 +95,7 @@ define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, <4 x i32>* nocapture %4)
   %11 = insertelement <4 x i32> %9, i32 %10, i32 2
   %12 = add nsw i32 %3, 1
   %13 = insertelement <4 x i32> %11, i32 %12, i32 3
-  store <4 x i32> %13, <4 x i32>* %4, align 16, !tbaa !2
+  store <4 x i32> %13, ptr %4, align 16, !tbaa !2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
index cbeb0c21344d4..62c59be3fce02 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
@@ -4,11 +4,9 @@
 ; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
 
 ; Checks that vector insertvalues into the struct become SLP seeds.
-define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
+define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfVectors(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
@@ -16,14 +14,13 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1
 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -43,11 +40,9 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
 
 %StructTy = type { float, float}
 
-define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
+define [2 x %StructTy] @ArrayOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @ArrayOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[TMP4]], 0
@@ -61,14 +56,13 @@ define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue [2 x %StructTy] [[RET0]], [[STRUCTTY]] [[STRUCTIN3]], 1
 ; CHECK-NEXT:    ret [2 x %StructTy] [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -86,11 +80,9 @@ define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
   ret [2 x %StructTy] %Ret1
 }
 
-define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
+define {%StructTy, %StructTy} @StructOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[TMP4]], 0
@@ -104,14 +96,13 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { [[STRUCTTY]], [[STRUCTTY]] } [[RET0]], [[STRUCTTY]] [[STRUCTIN3]], 1
 ; CHECK-NEXT:    ret { [[STRUCTTY]], [[STRUCTTY]] } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -129,16 +120,15 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
   ret {%StructTy, %StructTy} %Ret1
 }
 
-define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
+define {%StructTy, float, float} @NonHomogeneousStruct(ptr %Ptr) {
 ; CHECK-LABEL: @NonHomogeneousStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[GEP0]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
-; CHECK-NEXT:    [[L1:%.*]] = load float, float* [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
-; CHECK-NEXT:    [[L2:%.*]] = load float, float* [[GEP2]], align 4
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
-; CHECK-NEXT:    [[L3:%.*]] = load float, float* [[GEP3]], align 4
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 1
+; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 2
+; CHECK-NEXT:    [[L2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 3
+; CHECK-NEXT:    [[L3:%.*]] = load float, ptr [[GEP3]], align 4
 ; CHECK-NEXT:    [[FADD0:%.*]] = fadd fast float [[L0]], 1.100000e+01
 ; CHECK-NEXT:    [[FADD1:%.*]] = fadd fast float [[L1]], 1.200000e+01
 ; CHECK-NEXT:    [[FADD2:%.*]] = fadd fast float [[L2]], 1.300000e+01
@@ -150,14 +140,13 @@ define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
 ; CHECK-NEXT:    [[RET2:%.*]] = insertvalue { [[STRUCTTY]], float, float } [[RET1]], float [[FADD3]], 2
 ; CHECK-NEXT:    ret { [[STRUCTTY]], float, float } [[RET2]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -176,11 +165,9 @@ define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
 %Struct1Ty = type { i16, i16 }
 %Struct2Ty = type { %Struct1Ty, %Struct1Ty}
 
-define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
+define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfStructOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 0
@@ -206,22 +193,21 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET0]], [[STRUCT2TY]] [[STRUCT2IN3]], 1
 ; CHECK-NEXT:    ret { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds i16, i16* %Ptr, i64 0
-  %L0 = load i16, i16 * %GEP0
-  %GEP1 = getelementptr inbounds i16, i16* %Ptr, i64 1
-  %L1 = load i16, i16 * %GEP1
-  %GEP2 = getelementptr inbounds i16, i16* %Ptr, i64 2
-  %L2 = load i16, i16 * %GEP2
-  %GEP3 = getelementptr inbounds i16, i16* %Ptr, i64 3
-  %L3 = load i16, i16 * %GEP3
-  %GEP4 = getelementptr inbounds i16, i16* %Ptr, i64 4
-  %L4 = load i16, i16 * %GEP4
-  %GEP5 = getelementptr inbounds i16, i16* %Ptr, i64 5
-  %L5 = load i16, i16 * %GEP5
-  %GEP6 = getelementptr inbounds i16, i16* %Ptr, i64 6
-  %L6 = load i16, i16 * %GEP6
-  %GEP7 = getelementptr inbounds i16, i16* %Ptr, i64 7
-  %L7 = load i16, i16 * %GEP7
+  %L0 = load i16, ptr %Ptr
+  %GEP1 = getelementptr inbounds i16, ptr %Ptr, i64 1
+  %L1 = load i16, ptr %GEP1
+  %GEP2 = getelementptr inbounds i16, ptr %Ptr, i64 2
+  %L2 = load i16, ptr %GEP2
+  %GEP3 = getelementptr inbounds i16, ptr %Ptr, i64 3
+  %L3 = load i16, ptr %GEP3
+  %GEP4 = getelementptr inbounds i16, ptr %Ptr, i64 4
+  %L4 = load i16, ptr %GEP4
+  %GEP5 = getelementptr inbounds i16, ptr %Ptr, i64 5
+  %L5 = load i16, ptr %GEP5
+  %GEP6 = getelementptr inbounds i16, ptr %Ptr, i64 6
+  %L6 = load i16, ptr %GEP6
+  %GEP7 = getelementptr inbounds i16, ptr %Ptr, i64 7
+  %L7 = load i16, ptr %GEP7
 
   %Fadd0 = add i16 %L0, 1
   %Fadd1 = add i16 %L1, 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
index 7d7283e5c5904..ad6bad31bf4f0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
@@ -4,11 +4,9 @@
 ; See https://reviews.llvm.org/D70068 and https://reviews.llvm.org/D70587 for context
 
 ; Checks that vector insertvalues into the struct become SLP seeds.
-define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
+define { <2 x float>, <2 x float> } @StructOfVectors(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfVectors(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
@@ -16,14 +14,13 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1
 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -43,11 +40,9 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
 
 %StructTy = type { float, float}
 
-define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
+define [2 x %StructTy] @ArrayOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @ArrayOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[TMP4]], 0
@@ -61,14 +56,13 @@ define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue [2 x %StructTy] [[RET0]], [[STRUCTTY]] [[STRUCTIN3]], 1
 ; CHECK-NEXT:    ret [2 x %StructTy] [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -86,11 +80,9 @@ define [2 x %StructTy] @ArrayOfStruct(float *%Ptr) {
   ret [2 x %StructTy] %Ret1
 }
 
-define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
+define {%StructTy, %StructTy} @StructOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCTTY:%.*]] undef, float [[TMP4]], 0
@@ -104,14 +96,13 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { [[STRUCTTY]], [[STRUCTTY]] } [[RET0]], [[STRUCTTY]] [[STRUCTIN3]], 1
 ; CHECK-NEXT:    ret { [[STRUCTTY]], [[STRUCTTY]] } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -129,16 +120,15 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
   ret {%StructTy, %StructTy} %Ret1
 }
 
-define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
+define {%StructTy, float, float} @NonHomogeneousStruct(ptr %Ptr) {
 ; CHECK-LABEL: @NonHomogeneousStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[GEP0]], align 4
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
-; CHECK-NEXT:    [[L1:%.*]] = load float, float* [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
-; CHECK-NEXT:    [[L2:%.*]] = load float, float* [[GEP2]], align 4
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
-; CHECK-NEXT:    [[L3:%.*]] = load float, float* [[GEP3]], align 4
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 1
+; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 2
+; CHECK-NEXT:    [[L2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, ptr [[PTR]], i64 3
+; CHECK-NEXT:    [[L3:%.*]] = load float, ptr [[GEP3]], align 4
 ; CHECK-NEXT:    [[FADD0:%.*]] = fadd fast float [[L0]], 1.100000e+01
 ; CHECK-NEXT:    [[FADD1:%.*]] = fadd fast float [[L1]], 1.200000e+01
 ; CHECK-NEXT:    [[FADD2:%.*]] = fadd fast float [[L2]], 1.300000e+01
@@ -150,14 +140,13 @@ define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
 ; CHECK-NEXT:    [[RET2:%.*]] = insertvalue { [[STRUCTTY]], float, float } [[RET1]], float [[FADD3]], 2
 ; CHECK-NEXT:    ret { [[STRUCTTY]], float, float } [[RET2]]
 ;
-  %GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
-  %L0 = load float, float * %GEP0
-  %GEP1 = getelementptr inbounds float, float* %Ptr, i64 1
-  %L1 = load float, float * %GEP1
-  %GEP2 = getelementptr inbounds float, float* %Ptr, i64 2
-  %L2 = load float, float * %GEP2
-  %GEP3 = getelementptr inbounds float, float* %Ptr, i64 3
-  %L3 = load float, float * %GEP3
+  %L0 = load float, ptr %Ptr
+  %GEP1 = getelementptr inbounds float, ptr %Ptr, i64 1
+  %L1 = load float, ptr %GEP1
+  %GEP2 = getelementptr inbounds float, ptr %Ptr, i64 2
+  %L2 = load float, ptr %GEP2
+  %GEP3 = getelementptr inbounds float, ptr %Ptr, i64 3
+  %L3 = load float, ptr %GEP3
 
   %Fadd0 = fadd fast float %L0, 1.1e+01
   %Fadd1 = fadd fast float %L1, 1.2e+01
@@ -176,11 +165,9 @@ define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
 %Struct1Ty = type { i16, i16 }
 %Struct2Ty = type { %Struct1Ty, %Struct1Ty}
 
-define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
+define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfStructOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 0
@@ -206,22 +193,21 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET0]], [[STRUCT2TY]] [[STRUCT2IN3]], 1
 ; CHECK-NEXT:    ret { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds i16, i16* %Ptr, i64 0
-  %L0 = load i16, i16 * %GEP0
-  %GEP1 = getelementptr inbounds i16, i16* %Ptr, i64 1
-  %L1 = load i16, i16 * %GEP1
-  %GEP2 = getelementptr inbounds i16, i16* %Ptr, i64 2
-  %L2 = load i16, i16 * %GEP2
-  %GEP3 = getelementptr inbounds i16, i16* %Ptr, i64 3
-  %L3 = load i16, i16 * %GEP3
-  %GEP4 = getelementptr inbounds i16, i16* %Ptr, i64 4
-  %L4 = load i16, i16 * %GEP4
-  %GEP5 = getelementptr inbounds i16, i16* %Ptr, i64 5
-  %L5 = load i16, i16 * %GEP5
-  %GEP6 = getelementptr inbounds i16, i16* %Ptr, i64 6
-  %L6 = load i16, i16 * %GEP6
-  %GEP7 = getelementptr inbounds i16, i16* %Ptr, i64 7
-  %L7 = load i16, i16 * %GEP7
+  %L0 = load i16, ptr %Ptr
+  %GEP1 = getelementptr inbounds i16, ptr %Ptr, i64 1
+  %L1 = load i16, ptr %GEP1
+  %GEP2 = getelementptr inbounds i16, ptr %Ptr, i64 2
+  %L2 = load i16, ptr %GEP2
+  %GEP3 = getelementptr inbounds i16, ptr %Ptr, i64 3
+  %L3 = load i16, ptr %GEP3
+  %GEP4 = getelementptr inbounds i16, ptr %Ptr, i64 4
+  %L4 = load i16, ptr %GEP4
+  %GEP5 = getelementptr inbounds i16, ptr %Ptr, i64 5
+  %L5 = load i16, ptr %GEP5
+  %GEP6 = getelementptr inbounds i16, ptr %Ptr, i64 6
+  %L6 = load i16, ptr %GEP6
+  %GEP7 = getelementptr inbounds i16, ptr %Ptr, i64 7
+  %L7 = load i16, ptr %GEP7
 
   %Fadd0 = add i16 %L0, 1
   %Fadd1 = add i16 %L1, 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
index 646e23d73b925..264a41daef55b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
@@ -3,19 +3,17 @@
 
 ; See https://reviews.llvm.org/D83779
 
-define <2 x float> @foo({{float, float}}* %A) {
+define <2 x float> @foo(ptr %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
 entry:
-  %0 = bitcast {{float, float}}* %A to <2 x float>*
-  %1 = load <2 x float>, <2 x float>* %0
-  %L0 = extractelement <2 x float> %1, i32 0
-  %L1 = extractelement <2 x float> %1, i32 1
+  %0 = load <2 x float>, ptr %A
+  %L0 = extractelement <2 x float> %0, i32 0
+  %L1 = extractelement <2 x float> %0, i32 1
   %Mul0 = fmul float %L0, 2.000000e+00
   %Mul1 = fmul float %L1, 2.000000e+00
   %Ins1 = insertelement <2 x float> poison, float %Mul1, i32 1
@@ -27,11 +25,9 @@ entry:
 %Struct1Ty = type { i16, i16 }
 %Struct2Ty = type { %Struct1Ty, %Struct1Ty}
 
-define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
+define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfStructOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 1
@@ -57,22 +53,21 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET0]], [[STRUCT2TY]] [[STRUCT2IN1]], 0
 ; CHECK-NEXT:    ret { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds i16, i16* %Ptr, i64 0
-  %L0 = load i16, i16 * %GEP0
-  %GEP1 = getelementptr inbounds i16, i16* %Ptr, i64 1
-  %L1 = load i16, i16 * %GEP1
-  %GEP2 = getelementptr inbounds i16, i16* %Ptr, i64 2
-  %L2 = load i16, i16 * %GEP2
-  %GEP3 = getelementptr inbounds i16, i16* %Ptr, i64 3
-  %L3 = load i16, i16 * %GEP3
-  %GEP4 = getelementptr inbounds i16, i16* %Ptr, i64 4
-  %L4 = load i16, i16 * %GEP4
-  %GEP5 = getelementptr inbounds i16, i16* %Ptr, i64 5
-  %L5 = load i16, i16 * %GEP5
-  %GEP6 = getelementptr inbounds i16, i16* %Ptr, i64 6
-  %L6 = load i16, i16 * %GEP6
-  %GEP7 = getelementptr inbounds i16, i16* %Ptr, i64 7
-  %L7 = load i16, i16 * %GEP7
+  %L0 = load i16, ptr %Ptr
+  %GEP1 = getelementptr inbounds i16, ptr %Ptr, i64 1
+  %L1 = load i16, ptr %GEP1
+  %GEP2 = getelementptr inbounds i16, ptr %Ptr, i64 2
+  %L2 = load i16, ptr %GEP2
+  %GEP3 = getelementptr inbounds i16, ptr %Ptr, i64 3
+  %L3 = load i16, ptr %GEP3
+  %GEP4 = getelementptr inbounds i16, ptr %Ptr, i64 4
+  %L4 = load i16, ptr %GEP4
+  %GEP5 = getelementptr inbounds i16, ptr %Ptr, i64 5
+  %L5 = load i16, ptr %GEP5
+  %GEP6 = getelementptr inbounds i16, ptr %Ptr, i64 6
+  %L6 = load i16, ptr %GEP6
+  %GEP7 = getelementptr inbounds i16, ptr %Ptr, i64 7
+  %L7 = load i16, ptr %GEP7
 
   %Fadd0 = add i16 %L0, 1
   %Fadd1 = add i16 %L1, 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
index 33871d8e56ba8..6960e544b59a8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
@@ -3,19 +3,17 @@
 
 ; See https://reviews.llvm.org/D83779
 
-define <2 x float> @foo({{float, float}}* %A) {
+define <2 x float> @foo(ptr %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
 entry:
-  %0 = bitcast {{float, float}}* %A to <2 x float>*
-  %1 = load <2 x float>, <2 x float>* %0
-  %L0 = extractelement <2 x float> %1, i32 0
-  %L1 = extractelement <2 x float> %1, i32 1
+  %0 = load <2 x float>, ptr %A
+  %L0 = extractelement <2 x float> %0, i32 0
+  %L1 = extractelement <2 x float> %0, i32 1
   %Mul0 = fmul float %L0, 2.000000e+00
   %Mul1 = fmul float %L1, 2.000000e+00
   %Ins1 = insertelement <2 x float> undef, float %Mul1, i32 1
@@ -27,11 +25,9 @@ entry:
 %Struct1Ty = type { i16, i16 }
 %Struct2Ty = type { %Struct1Ty, %Struct1Ty}
 
-define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
+define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(ptr %Ptr) {
 ; CHECK-LABEL: @StructOfStructOfStruct(
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
 ; CHECK-NEXT:    [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 1
@@ -57,22 +53,21 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
 ; CHECK-NEXT:    [[RET1:%.*]] = insertvalue { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET0]], [[STRUCT2TY]] [[STRUCT2IN1]], 0
 ; CHECK-NEXT:    ret { [[STRUCT2TY]], [[STRUCT2TY]] } [[RET1]]
 ;
-  %GEP0 = getelementptr inbounds i16, i16* %Ptr, i64 0
-  %L0 = load i16, i16 * %GEP0
-  %GEP1 = getelementptr inbounds i16, i16* %Ptr, i64 1
-  %L1 = load i16, i16 * %GEP1
-  %GEP2 = getelementptr inbounds i16, i16* %Ptr, i64 2
-  %L2 = load i16, i16 * %GEP2
-  %GEP3 = getelementptr inbounds i16, i16* %Ptr, i64 3
-  %L3 = load i16, i16 * %GEP3
-  %GEP4 = getelementptr inbounds i16, i16* %Ptr, i64 4
-  %L4 = load i16, i16 * %GEP4
-  %GEP5 = getelementptr inbounds i16, i16* %Ptr, i64 5
-  %L5 = load i16, i16 * %GEP5
-  %GEP6 = getelementptr inbounds i16, i16* %Ptr, i64 6
-  %L6 = load i16, i16 * %GEP6
-  %GEP7 = getelementptr inbounds i16, i16* %Ptr, i64 7
-  %L7 = load i16, i16 * %GEP7
+  %L0 = load i16, ptr %Ptr
+  %GEP1 = getelementptr inbounds i16, ptr %Ptr, i64 1
+  %L1 = load i16, ptr %GEP1
+  %GEP2 = getelementptr inbounds i16, ptr %Ptr, i64 2
+  %L2 = load i16, ptr %GEP2
+  %GEP3 = getelementptr inbounds i16, ptr %Ptr, i64 3
+  %L3 = load i16, ptr %GEP3
+  %GEP4 = getelementptr inbounds i16, ptr %Ptr, i64 4
+  %L4 = load i16, ptr %GEP4
+  %GEP5 = getelementptr inbounds i16, ptr %Ptr, i64 5
+  %L5 = load i16, ptr %GEP5
+  %GEP6 = getelementptr inbounds i16, ptr %Ptr, i64 6
+  %L6 = load i16, ptr %GEP6
+  %GEP7 = getelementptr inbounds i16, ptr %Ptr, i64 7
+  %L7 = load i16, ptr %GEP7
 
   %Fadd0 = add i16 %L0, 1
   %Fadd1 = add i16 %L1, 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
index a9e30c0fb8f8c..48d8de2a96186 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
@@ -5,53 +5,50 @@
 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2   | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX
 
-define void @store_i32(i32* nocapture %0, i32 %1, i32 %2) {
+define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) {
 ; CHECK-LABEL: @store_i32(
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul <4 x i32> [[TMP5]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
-  %4 = load i32, i32* %0, align 4, !tbaa !2
+  %4 = load i32, ptr %0, align 4, !tbaa !2
   %5 = mul i32 %4, %1
   %6 = lshr i32 %5, 15
   %7 = icmp ult i32 %6, 255
   %8 = select i1 %7, i32 %6, i32 255
-  store i32 %8, i32* %0, align 4, !tbaa !2
-  %9 = getelementptr inbounds i32, i32* %0, i64 1
-  %10 = load i32, i32* %9, align 4, !tbaa !2
+  store i32 %8, ptr %0, align 4, !tbaa !2
+  %9 = getelementptr inbounds i32, ptr %0, i64 1
+  %10 = load i32, ptr %9, align 4, !tbaa !2
   %11 = mul i32 %10, %1
   %12 = lshr i32 %11, 15
   %13 = icmp ult i32 %12, 255
   %14 = select i1 %13, i32 %12, i32 255
-  store i32 %14, i32* %9, align 4, !tbaa !2
-  %15 = getelementptr inbounds i32, i32* %0, i64 2
-  %16 = load i32, i32* %15, align 4, !tbaa !2
+  store i32 %14, ptr %9, align 4, !tbaa !2
+  %15 = getelementptr inbounds i32, ptr %0, i64 2
+  %16 = load i32, ptr %15, align 4, !tbaa !2
   %17 = mul i32 %16, %1
   %18 = lshr i32 %17, 15
   %19 = icmp ult i32 %18, 255
   %20 = select i1 %19, i32 %18, i32 255
-  store i32 %20, i32* %15, align 4, !tbaa !2
-  %21 = getelementptr inbounds i32, i32* %0, i64 3
-  %22 = load i32, i32* %21, align 4, !tbaa !2
+  store i32 %20, ptr %15, align 4, !tbaa !2
+  %21 = getelementptr inbounds i32, ptr %0, i64 3
+  %22 = load i32, ptr %21, align 4, !tbaa !2
   %23 = mul i32 %22, %1
   %24 = lshr i32 %23, 15
   %25 = icmp ult i32 %24, 255
   %26 = select i1 %25, i32 %24, i32 255
-  store i32 %26, i32* %21, align 4, !tbaa !2
+  store i32 %26, ptr %21, align 4, !tbaa !2
   ret void
 }
 
-define void @store_i8(i8* nocapture %0, i32 %1, i32 %2) {
+define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
 ; CHECK-LABEL: @store_i8(
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP0:%.*]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr [[TMP0:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <4 x i8> [[TMP5]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -59,92 +56,90 @@ define void @store_i8(i8* nocapture %0, i32 %1, i32 %2) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8>
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
-; CHECK-NEXT:    store <4 x i8> [[TMP11]], <4 x i8>* [[TMP12]], align 1, !tbaa [[TBAA4]]
+; CHECK-NEXT:    store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]]
 ; CHECK-NEXT:    ret void
 ;
-  %4 = load i8, i8* %0, align 1, !tbaa !6
+  %4 = load i8, ptr %0, align 1, !tbaa !6
   %5 = zext i8 %4 to i32
   %6 = mul i32 %5, %1
   %7 = lshr i32 %6, 15
   %8 = icmp ult i32 %7, 255
   %9 = select i1 %8, i32 %7, i32 255
   %10 = trunc i32 %9 to i8
-  store i8 %10, i8* %0, align 1, !tbaa !6
-  %11 = getelementptr inbounds i8, i8* %0, i64 1
-  %12 = load i8, i8* %11, align 1, !tbaa !6
+  store i8 %10, ptr %0, align 1, !tbaa !6
+  %11 = getelementptr inbounds i8, ptr %0, i64 1
+  %12 = load i8, ptr %11, align 1, !tbaa !6
   %13 = zext i8 %12 to i32
   %14 = mul i32 %13, %1
   %15 = lshr i32 %14, 15
   %16 = icmp ult i32 %15, 255
   %17 = select i1 %16, i32 %15, i32 255
   %18 = trunc i32 %17 to i8
-  store i8 %18, i8* %11, align 1, !tbaa !6
-  %19 = getelementptr inbounds i8, i8* %0, i64 2
-  %20 = load i8, i8* %19, align 1, !tbaa !6
+  store i8 %18, ptr %11, align 1, !tbaa !6
+  %19 = getelementptr inbounds i8, ptr %0, i64 2
+  %20 = load i8, ptr %19, align 1, !tbaa !6
   %21 = zext i8 %20 to i32
   %22 = mul i32 %21, %1
   %23 = lshr i32 %22, 15
   %24 = icmp ult i32 %23, 255
   %25 = select i1 %24, i32 %23, i32 255
   %26 = trunc i32 %25 to i8
-  store i8 %26, i8* %19, align 1, !tbaa !6
-  %27 = getelementptr inbounds i8, i8* %0, i64 3
-  %28 = load i8, i8* %27, align 1, !tbaa !6
+  store i8 %26, ptr %19, align 1, !tbaa !6
+  %27 = getelementptr inbounds i8, ptr %0, i64 3
+  %28 = load i8, ptr %27, align 1, !tbaa !6
   %29 = zext i8 %28 to i32
   %30 = mul i32 %29, %1
   %31 = lshr i32 %30, 15
   %32 = icmp ult i32 %31, 255
   %33 = select i1 %32, i32 %31, i32 255
   %34 = trunc i32 %33 to i8
-  store i8 %34, i8* %27, align 1, !tbaa !6
+  store i8 %34, ptr %27, align 1, !tbaa !6
   ret void
 }
 
-define void @store_i64(i64* nocapture %0, i32 %1, i32 %2) {
+define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
 ; SSE-LABEL: @store_i64(
 ; SSE-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
-; SSE-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; SSE-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
 ; SSE-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]]
 ; SSE-NEXT:    [[TMP7:%.*]] = lshr i64 [[TMP6]], 15
 ; SSE-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
 ; SSE-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255
 ; SSE-NEXT:    [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295
 ; SSE-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255
-; SSE-NEXT:    store i64 [[TMP11]], i64* [[TMP0]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 1
-; SSE-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    store i64 [[TMP11]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 1
+; SSE-NEXT:    [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
 ; SSE-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]]
 ; SSE-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP14]], 15
 ; SSE-NEXT:    [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
 ; SSE-NEXT:    [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255
 ; SSE-NEXT:    [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295
 ; SSE-NEXT:    [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255
-; SSE-NEXT:    store i64 [[TMP19]], i64* [[TMP12]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2
-; SSE-NEXT:    [[TMP21:%.*]] = load i64, i64* [[TMP20]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    store i64 [[TMP19]], ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 2
+; SSE-NEXT:    [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
 ; SSE-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]]
 ; SSE-NEXT:    [[TMP23:%.*]] = lshr i64 [[TMP22]], 15
 ; SSE-NEXT:    [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
 ; SSE-NEXT:    [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255
 ; SSE-NEXT:    [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295
 ; SSE-NEXT:    [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255
-; SSE-NEXT:    store i64 [[TMP27]], i64* [[TMP20]], align 8, !tbaa [[TBAA5]]
-; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 3
-; SSE-NEXT:    [[TMP29:%.*]] = load i64, i64* [[TMP28]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    store i64 [[TMP27]], ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 3
+; SSE-NEXT:    [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
 ; SSE-NEXT:    [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]]
 ; SSE-NEXT:    [[TMP31:%.*]] = lshr i64 [[TMP30]], 15
 ; SSE-NEXT:    [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
 ; SSE-NEXT:    [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255
 ; SSE-NEXT:    [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295
 ; SSE-NEXT:    [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255
-; SSE-NEXT:    store i64 [[TMP35]], i64* [[TMP28]], align 8, !tbaa [[TBAA5]]
+; SSE-NEXT:    store i64 [[TMP35]], ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @store_i64(
 ; AVX-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
-; AVX-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP0:%.*]] to <4 x i64>*
-; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, <4 x i64>* [[TMP5]], align 8, !tbaa [[TBAA5:![0-9]+]]
+; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
 ; AVX-NEXT:    [[TMP7:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
 ; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; AVX-NEXT:    [[TMP8:%.*]] = mul <4 x i64> [[TMP6]], [[SHUFFLE]]
@@ -153,46 +148,45 @@ define void @store_i64(i64* nocapture %0, i32 %1, i32 %2) {
 ; AVX-NEXT:    [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255>
 ; AVX-NEXT:    [[TMP12:%.*]] = and <4 x i64> [[TMP9]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
 ; AVX-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> <i64 255, i64 255, i64 255, i64 255>
-; AVX-NEXT:    [[TMP14:%.*]] = bitcast i64* [[TMP0]] to <4 x i64>*
-; AVX-NEXT:    store <4 x i64> [[TMP13]], <4 x i64>* [[TMP14]], align 8, !tbaa [[TBAA5]]
+; AVX-NEXT:    store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
 ; AVX-NEXT:    ret void
 ;
   %4 = zext i32 %1 to i64
-  %5 = load i64, i64* %0, align 8, !tbaa !7
+  %5 = load i64, ptr %0, align 8, !tbaa !7
   %6 = mul i64 %5, %4
   %7 = lshr i64 %6, 15
   %8 = trunc i64 %7 to i32
   %9 = icmp ult i32 %8, 255
   %10 = and i64 %7, 4294967295
   %11 = select i1 %9, i64 %10, i64 255
-  store i64 %11, i64* %0, align 8, !tbaa !7
-  %12 = getelementptr inbounds i64, i64* %0, i64 1
-  %13 = load i64, i64* %12, align 8, !tbaa !7
+  store i64 %11, ptr %0, align 8, !tbaa !7
+  %12 = getelementptr inbounds i64, ptr %0, i64 1
+  %13 = load i64, ptr %12, align 8, !tbaa !7
   %14 = mul i64 %13, %4
   %15 = lshr i64 %14, 15
   %16 = trunc i64 %15 to i32
   %17 = icmp ult i32 %16, 255
   %18 = and i64 %15, 4294967295
   %19 = select i1 %17, i64 %18, i64 255
-  store i64 %19, i64* %12, align 8, !tbaa !7
-  %20 = getelementptr inbounds i64, i64* %0, i64 2
-  %21 = load i64, i64* %20, align 8, !tbaa !7
+  store i64 %19, ptr %12, align 8, !tbaa !7
+  %20 = getelementptr inbounds i64, ptr %0, i64 2
+  %21 = load i64, ptr %20, align 8, !tbaa !7
   %22 = mul i64 %21, %4
   %23 = lshr i64 %22, 15
   %24 = trunc i64 %23 to i32
   %25 = icmp ult i32 %24, 255
   %26 = and i64 %23, 4294967295
   %27 = select i1 %25, i64 %26, i64 255
-  store i64 %27, i64* %20, align 8, !tbaa !7
-  %28 = getelementptr inbounds i64, i64* %0, i64 3
-  %29 = load i64, i64* %28, align 8, !tbaa !7
+  store i64 %27, ptr %20, align 8, !tbaa !7
+  %28 = getelementptr inbounds i64, ptr %0, i64 3
+  %29 = load i64, ptr %28, align 8, !tbaa !7
   %30 = mul i64 %29, %4
   %31 = lshr i64 %30, 15
   %32 = trunc i64 %31 to i32
   %33 = icmp ult i32 %32, 255
   %34 = and i64 %31, 4294967295
   %35 = select i1 %33, i64 %34, i64 255
-  store i64 %35, i64* %28, align 8, !tbaa !7
+  store i64 %35, ptr %28, align 8, !tbaa !7
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
index 7d4bf0f08351d..c46a5aa758fb3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll
@@ -11,45 +11,45 @@
 
 define void @foo() {
 ; SSE-LABEL: @foo(
-; SSE-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), align 16
-; SSE-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 0), align 16
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2), align 8
-; SSE-NEXT:    store i32 [[TMP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 1), align 4
-; SSE-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 2), align 8
-; SSE-NEXT:    store i32 [[TMP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 3), align 4
-; SSE-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 4), align 16
-; SSE-NEXT:    store i32 [[TMP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 5), align 4
-; SSE-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 6), align 8
-; SSE-NEXT:    store i32 [[TMP2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 7), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load i32, ptr @b, align 16
+; SSE-NEXT:    store i32 [[TMP1]], ptr @a, align 16
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @b, i64 0, i64 2), align 8
+; SSE-NEXT:    store i32 [[TMP2]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[TMP1]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 2), align 8
+; SSE-NEXT:    store i32 [[TMP2]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 3), align 4
+; SSE-NEXT:    store i32 [[TMP1]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 4), align 16
+; SSE-NEXT:    store i32 [[TMP2]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 5), align 4
+; SSE-NEXT:    store i32 [[TMP1]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 6), align 8
+; SSE-NEXT:    store i32 [[TMP2]], ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 7), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @foo(
-; AVX-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), align 16
-; AVX-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load i32, ptr @b, align 16
+; AVX-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @b, i64 0, i64 2), align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
 ; AVX-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1
 ; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
-; AVX-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16
+; AVX-NEXT:    store <8 x i32> [[SHUFFLE]], ptr @a, align 16
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @foo(
-; AVX512-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), align 16
-; AVX512-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load i32, ptr @b, align 16
+; AVX512-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @b, i64 0, i64 2), align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
 ; AVX512-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1
 ; AVX512-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
-; AVX512-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* bitcast ([8 x i32]* @a to <8 x i32>*), align 16
+; AVX512-NEXT:    store <8 x i32> [[SHUFFLE]], ptr @a, align 16
 ; AVX512-NEXT:    ret void
 ;
-  %1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 0), align 16
-  store i32 %1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 0), align 16
-  %2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i64 0, i64 2), align 8
-  store i32 %2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 1), align 4
-  store i32 %1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 2), align 8
-  store i32 %2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 3), align 4
-  store i32 %1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 4), align 16
-  store i32 %2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 5), align 4
-  store i32 %1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 6), align 8
-  store i32 %2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 7), align 4
+  %1 = load i32, ptr @b, align 16
+  store i32 %1, ptr @a, align 16
+  %2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @b, i64 0, i64 2), align 8
+  store i32 %2, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 1), align 4
+  store i32 %1, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 2), align 8
+  store i32 %2, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 3), align 4
+  store i32 %1, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 4), align 16
+  store i32 %2, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 5), align 4
+  store i32 %1, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 6), align 8
+  store i32 %2, ptr getelementptr inbounds ([8 x i32], ptr @a, i64 0, i64 7), align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
index b96149ad01abb..3850b42327857 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
@@ -5,273 +5,263 @@
 ; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f  | FileCheck %s --check-prefixes=AVX512F
 ; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
 
-define void @gather_load(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
+define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; SSE-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; SSE-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; SSE-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; SSE-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; SSE-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; AVX-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; AVX2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; AVX2-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX2-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; AVX512F-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512F-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> poison, <4 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x i32*> [[SHUFFLE]], <4 x i64> <i64 0, i64 11, i64 4, i64 1>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 0, i64 11, i64 4, i64 1>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]]
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = getelementptr inbounds i32, i32* %1, i64 1
-  %4 = load i32, i32* %1, align 4, !tbaa !2
-  %5 = getelementptr inbounds i32, i32* %0, i64 1
-  %6 = getelementptr inbounds i32, i32* %1, i64 11
-  %7 = load i32, i32* %6, align 4, !tbaa !2
-  %8 = getelementptr inbounds i32, i32* %0, i64 2
-  %9 = getelementptr inbounds i32, i32* %1, i64 4
-  %10 = load i32, i32* %9, align 4, !tbaa !2
-  %11 = getelementptr inbounds i32, i32* %0, i64 3
-  %12 = load i32, i32* %3, align 4, !tbaa !2
+  %3 = getelementptr inbounds i32, ptr %1, i64 1
+  %4 = load i32, ptr %1, align 4, !tbaa !2
+  %5 = getelementptr inbounds i32, ptr %0, i64 1
+  %6 = getelementptr inbounds i32, ptr %1, i64 11
+  %7 = load i32, ptr %6, align 4, !tbaa !2
+  %8 = getelementptr inbounds i32, ptr %0, i64 2
+  %9 = getelementptr inbounds i32, ptr %1, i64 4
+  %10 = load i32, ptr %9, align 4, !tbaa !2
+  %11 = getelementptr inbounds i32, ptr %0, i64 3
+  %12 = load i32, ptr %3, align 4, !tbaa !2
   %13 = insertelement <4 x i32> poison, i32 %4, i32 0
   %14 = insertelement <4 x i32> %13, i32 %7, i32 1
   %15 = insertelement <4 x i32> %14, i32 %10, i32 2
   %16 = insertelement <4 x i32> %15, i32 %12, i32 3
   %17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4>
-  %18 = bitcast i32* %0 to <4 x i32>*
-  store <4 x i32> %17, <4 x i32>* %18, align 4, !tbaa !2
+  store <4 x i32> %17, ptr %0, align 4, !tbaa !2
   ret void
 }
 
-define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
+define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_2(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP5]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP9]], i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP13]], i32* [[TMP10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
+; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
-; SSE-NEXT:    store i32 [[TMP17]], i32* [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_2(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2
 ; AVX-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
 ; AVX-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_2(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2
 ; AVX2-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
 ; AVX2-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX2-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_2(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
 ; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1
 ; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2
 ; AVX512F-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
 ; AVX512F-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512F-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_2(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> poison, <4 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x i32*> [[SHUFFLE]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = getelementptr inbounds i32, i32* %1, i64 1
-  %4 = load i32, i32* %3, align 4, !tbaa !2
+  %3 = getelementptr inbounds i32, ptr %1, i64 1
+  %4 = load i32, ptr %3, align 4, !tbaa !2
   %5 = add nsw i32 %4, 1
-  %6 = getelementptr inbounds i32, i32* %0, i64 1
-  store i32 %5, i32* %0, align 4, !tbaa !2
-  %7 = getelementptr inbounds i32, i32* %1, i64 10
-  %8 = load i32, i32* %7, align 4, !tbaa !2
+  %6 = getelementptr inbounds i32, ptr %0, i64 1
+  store i32 %5, ptr %0, align 4, !tbaa !2
+  %7 = getelementptr inbounds i32, ptr %1, i64 10
+  %8 = load i32, ptr %7, align 4, !tbaa !2
   %9 = add nsw i32 %8, 2
-  %10 = getelementptr inbounds i32, i32* %0, i64 2
-  store i32 %9, i32* %6, align 4, !tbaa !2
-  %11 = getelementptr inbounds i32, i32* %1, i64 3
-  %12 = load i32, i32* %11, align 4, !tbaa !2
+  %10 = getelementptr inbounds i32, ptr %0, i64 2
+  store i32 %9, ptr %6, align 4, !tbaa !2
+  %11 = getelementptr inbounds i32, ptr %1, i64 3
+  %12 = load i32, ptr %11, align 4, !tbaa !2
   %13 = add nsw i32 %12, 3
-  %14 = getelementptr inbounds i32, i32* %0, i64 3
-  store i32 %13, i32* %10, align 4, !tbaa !2
-  %15 = getelementptr inbounds i32, i32* %1, i64 5
-  %16 = load i32, i32* %15, align 4, !tbaa !2
+  %14 = getelementptr inbounds i32, ptr %0, i64 3
+  store i32 %13, ptr %10, align 4, !tbaa !2
+  %15 = getelementptr inbounds i32, ptr %1, i64 5
+  %16 = load i32, ptr %15, align 4, !tbaa !2
   %17 = add nsw i32 %16, 4
-  store i32 %17, i32* %14, align 4, !tbaa !2
+  store i32 %17, ptr %14, align 4, !tbaa !2
   ret void
 }
 
 
-define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
+define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_3(
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP8]], i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
-; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP12]], i32* [[TMP9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
+; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
-; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
-; SSE-NEXT:    store i32 [[TMP16]], i32* [[TMP13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
-; SSE-NEXT:    [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
+; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
-; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
-; SSE-NEXT:    store i32 [[TMP20]], i32* [[TMP17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
-; SSE-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 5
+; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
+; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
-; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6
-; SSE-NEXT:    store i32 [[TMP24]], i32* [[TMP21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; SSE-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 6
+; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
+; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
-; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7
-; SSE-NEXT:    store i32 [[TMP28]], i32* [[TMP25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
-; SSE-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 7
+; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
+; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
-; SSE-NEXT:    store i32 [[TMP32]], i32* [[TMP29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_3(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 11
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 11
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP10]], i64 0
 ; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP11]], i64 1
 ; AVX-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP12]], i64 2
@@ -281,26 +271,25 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
 ; AVX-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP16]], i64 6
 ; AVX-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
 ; AVX-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX-NEXT:    store <8 x i32> [[TMP26]], <8 x i32>* [[TMP27]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_3(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 11
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 11
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP10]], i64 0
 ; AVX2-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP11]], i64 1
 ; AVX2-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP12]], i64 2
@@ -310,95 +299,92 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
 ; AVX2-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP16]], i64 6
 ; AVX2-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
 ; AVX2-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP26]], <8 x i32>* [[TMP27]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_3(
-; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP3]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512F-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512F-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX512F-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_3(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP3]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = load i32, i32* %1, align 4, !tbaa !2
+  %3 = load i32, ptr %1, align 4, !tbaa !2
   %4 = add i32 %3, 1
-  %5 = getelementptr inbounds i32, i32* %0, i64 1
-  store i32 %4, i32* %0, align 4, !tbaa !2
-  %6 = getelementptr inbounds i32, i32* %1, i64 11
-  %7 = load i32, i32* %6, align 4, !tbaa !2
+  %5 = getelementptr inbounds i32, ptr %0, i64 1
+  store i32 %4, ptr %0, align 4, !tbaa !2
+  %6 = getelementptr inbounds i32, ptr %1, i64 11
+  %7 = load i32, ptr %6, align 4, !tbaa !2
   %8 = add i32 %7, 2
-  %9 = getelementptr inbounds i32, i32* %0, i64 2
-  store i32 %8, i32* %5, align 4, !tbaa !2
-  %10 = getelementptr inbounds i32, i32* %1, i64 4
-  %11 = load i32, i32* %10, align 4, !tbaa !2
+  %9 = getelementptr inbounds i32, ptr %0, i64 2
+  store i32 %8, ptr %5, align 4, !tbaa !2
+  %10 = getelementptr inbounds i32, ptr %1, i64 4
+  %11 = load i32, ptr %10, align 4, !tbaa !2
   %12 = add i32 %11, 3
-  %13 = getelementptr inbounds i32, i32* %0, i64 3
-  store i32 %12, i32* %9, align 4, !tbaa !2
-  %14 = getelementptr inbounds i32, i32* %1, i64 15
-  %15 = load i32, i32* %14, align 4, !tbaa !2
+  %13 = getelementptr inbounds i32, ptr %0, i64 3
+  store i32 %12, ptr %9, align 4, !tbaa !2
+  %14 = getelementptr inbounds i32, ptr %1, i64 15
+  %15 = load i32, ptr %14, align 4, !tbaa !2
   %16 = add i32 %15, 4
-  %17 = getelementptr inbounds i32, i32* %0, i64 4
-  store i32 %16, i32* %13, align 4, !tbaa !2
-  %18 = getelementptr inbounds i32, i32* %1, i64 18
-  %19 = load i32, i32* %18, align 4, !tbaa !2
+  %17 = getelementptr inbounds i32, ptr %0, i64 4
+  store i32 %16, ptr %13, align 4, !tbaa !2
+  %18 = getelementptr inbounds i32, ptr %1, i64 18
+  %19 = load i32, ptr %18, align 4, !tbaa !2
   %20 = add i32 %19, 1
-  %21 = getelementptr inbounds i32, i32* %0, i64 5
-  store i32 %20, i32* %17, align 4, !tbaa !2
-  %22 = getelementptr inbounds i32, i32* %1, i64 9
-  %23 = load i32, i32* %22, align 4, !tbaa !2
+  %21 = getelementptr inbounds i32, ptr %0, i64 5
+  store i32 %20, ptr %17, align 4, !tbaa !2
+  %22 = getelementptr inbounds i32, ptr %1, i64 9
+  %23 = load i32, ptr %22, align 4, !tbaa !2
   %24 = add i32 %23, 2
-  %25 = getelementptr inbounds i32, i32* %0, i64 6
-  store i32 %24, i32* %21, align 4, !tbaa !2
-  %26 = getelementptr inbounds i32, i32* %1, i64 6
-  %27 = load i32, i32* %26, align 4, !tbaa !2
+  %25 = getelementptr inbounds i32, ptr %0, i64 6
+  store i32 %24, ptr %21, align 4, !tbaa !2
+  %26 = getelementptr inbounds i32, ptr %1, i64 6
+  %27 = load i32, ptr %26, align 4, !tbaa !2
   %28 = add i32 %27, 3
-  %29 = getelementptr inbounds i32, i32* %0, i64 7
-  store i32 %28, i32* %25, align 4, !tbaa !2
-  %30 = getelementptr inbounds i32, i32* %1, i64 21
-  %31 = load i32, i32* %30, align 4, !tbaa !2
+  %29 = getelementptr inbounds i32, ptr %0, i64 7
+  store i32 %28, ptr %25, align 4, !tbaa !2
+  %30 = getelementptr inbounds i32, ptr %1, i64 21
+  %31 = load i32, ptr %30, align 4, !tbaa !2
   %32 = add i32 %31, 4
-  store i32 %32, i32* %29, align 4, !tbaa !2
+  store i32 %32, ptr %29, align 4, !tbaa !2
   ret void
 }
 
-define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture readonly %t1) {
+define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
 ; SSE-LABEL: @gather_load_4(
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
-; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
-; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 2
-; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
-; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 3
-; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
-; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4
-; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
-; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
-; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
-; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
-; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
-; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
-; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
-; SSE-NEXT:    [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0:%.*]], i64 1
+; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 2
+; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
+; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 3
+; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
+; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 4
+; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
+; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 5
+; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
+; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 6
+; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
+; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 7
+; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
+; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[T4:%.*]] = add i32 [[T3]], 1
 ; SSE-NEXT:    [[T8:%.*]] = add i32 [[T7]], 2
 ; SSE-NEXT:    [[T12:%.*]] = add i32 [[T11]], 3
@@ -407,32 +393,32 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
 ; SSE-NEXT:    [[T24:%.*]] = add i32 [[T23]], 2
 ; SSE-NEXT:    [[T28:%.*]] = add i32 [[T27]], 3
 ; SSE-NEXT:    [[T32:%.*]] = add i32 [[T31]], 4
-; SSE-NEXT:    store i32 [[T4]], i32* [[T0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T8]], i32* [[T5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T12]], i32* [[T9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T16]], i32* [[T13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T20]], i32* [[T17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T24]], i32* [[T21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T28]], i32* [[T25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T32]], i32* [[T29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_4(
-; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
-; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
-; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
-; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
-; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
-; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
-; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
-; AVX-NEXT:    [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
+; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
+; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
+; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
+; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
+; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
+; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
@@ -442,26 +428,25 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
 ; AVX-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
 ; AVX-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
 ; AVX-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP10:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX-NEXT:    store <8 x i32> [[TMP9]], <8 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_4(
-; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
-; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
-; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
-; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
-; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
-; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
-; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
-; AVX2-NEXT:    [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
+; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
+; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
+; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
+; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
+; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
+; AVX2-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
 ; AVX2-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
 ; AVX2-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
@@ -471,53 +456,50 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
 ; AVX2-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
 ; AVX2-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
 ; AVX2-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP10:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP9]], <8 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_4(
-; AVX512F-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32*> poison, i32* [[T1:%.*]], i64 0
-; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP1]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512F-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512F-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
+; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512F-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512F-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[TMP3]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP5:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX512F-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_4(
-; AVX512VL-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32*> poison, i32* [[T1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP1]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512VL-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512VL-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[TMP3]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %t5 = getelementptr inbounds i32, i32* %t0, i64 1
-  %t6 = getelementptr inbounds i32, i32* %t1, i64 11
-  %t9 = getelementptr inbounds i32, i32* %t0, i64 2
-  %t10 = getelementptr inbounds i32, i32* %t1, i64 4
-  %t13 = getelementptr inbounds i32, i32* %t0, i64 3
-  %t14 = getelementptr inbounds i32, i32* %t1, i64 15
-  %t17 = getelementptr inbounds i32, i32* %t0, i64 4
-  %t18 = getelementptr inbounds i32, i32* %t1, i64 18
-  %t21 = getelementptr inbounds i32, i32* %t0, i64 5
-  %t22 = getelementptr inbounds i32, i32* %t1, i64 9
-  %t25 = getelementptr inbounds i32, i32* %t0, i64 6
-  %t26 = getelementptr inbounds i32, i32* %t1, i64 6
-  %t29 = getelementptr inbounds i32, i32* %t0, i64 7
-  %t30 = getelementptr inbounds i32, i32* %t1, i64 21
+  %t5 = getelementptr inbounds i32, ptr %t0, i64 1
+  %t6 = getelementptr inbounds i32, ptr %t1, i64 11
+  %t9 = getelementptr inbounds i32, ptr %t0, i64 2
+  %t10 = getelementptr inbounds i32, ptr %t1, i64 4
+  %t13 = getelementptr inbounds i32, ptr %t0, i64 3
+  %t14 = getelementptr inbounds i32, ptr %t1, i64 15
+  %t17 = getelementptr inbounds i32, ptr %t0, i64 4
+  %t18 = getelementptr inbounds i32, ptr %t1, i64 18
+  %t21 = getelementptr inbounds i32, ptr %t0, i64 5
+  %t22 = getelementptr inbounds i32, ptr %t1, i64 9
+  %t25 = getelementptr inbounds i32, ptr %t0, i64 6
+  %t26 = getelementptr inbounds i32, ptr %t1, i64 6
+  %t29 = getelementptr inbounds i32, ptr %t0, i64 7
+  %t30 = getelementptr inbounds i32, ptr %t1, i64 21
 
-  %t3 = load i32, i32* %t1, align 4, !tbaa !2
-  %t7 = load i32, i32* %t6, align 4, !tbaa !2
-  %t11 = load i32, i32* %t10, align 4, !tbaa !2
-  %t15 = load i32, i32* %t14, align 4, !tbaa !2
-  %t19 = load i32, i32* %t18, align 4, !tbaa !2
-  %t23 = load i32, i32* %t22, align 4, !tbaa !2
-  %t27 = load i32, i32* %t26, align 4, !tbaa !2
-  %t31 = load i32, i32* %t30, align 4, !tbaa !2
+  %t3 = load i32, ptr %t1, align 4, !tbaa !2
+  %t7 = load i32, ptr %t6, align 4, !tbaa !2
+  %t11 = load i32, ptr %t10, align 4, !tbaa !2
+  %t15 = load i32, ptr %t14, align 4, !tbaa !2
+  %t19 = load i32, ptr %t18, align 4, !tbaa !2
+  %t23 = load i32, ptr %t22, align 4, !tbaa !2
+  %t27 = load i32, ptr %t26, align 4, !tbaa !2
+  %t31 = load i32, ptr %t30, align 4, !tbaa !2
 
   %t4 = add i32 %t3, 1
   %t8 = add i32 %t7, 2
@@ -528,37 +510,37 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
   %t28 = add i32 %t27, 3
   %t32 = add i32 %t31, 4
 
-  store i32 %t4, i32* %t0, align 4, !tbaa !2
-  store i32 %t8, i32* %t5, align 4, !tbaa !2
-  store i32 %t12, i32* %t9, align 4, !tbaa !2
-  store i32 %t16, i32* %t13, align 4, !tbaa !2
-  store i32 %t20, i32* %t17, align 4, !tbaa !2
-  store i32 %t24, i32* %t21, align 4, !tbaa !2
-  store i32 %t28, i32* %t25, align 4, !tbaa !2
-  store i32 %t32, i32* %t29, align 4, !tbaa !2
+  store i32 %t4, ptr %t0, align 4, !tbaa !2
+  store i32 %t8, ptr %t5, align 4, !tbaa !2
+  store i32 %t12, ptr %t9, align 4, !tbaa !2
+  store i32 %t16, ptr %t13, align 4, !tbaa !2
+  store i32 %t20, ptr %t17, align 4, !tbaa !2
+  store i32 %t24, ptr %t21, align 4, !tbaa !2
+  store i32 %t28, ptr %t25, align 4, !tbaa !2
+  store i32 %t32, ptr %t29, align 4, !tbaa !2
 
   ret void
 }
 
 
-define void @gather_load_div(float* noalias nocapture %0, float* noalias nocapture readonly %1) {
+define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_div(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 4
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 10
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 13
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 44
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 4
-; SSE-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP12:%.*]] = load float, float* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP14:%.*]] = load float, float* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP15:%.*]] = load float, float* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 4
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 11
+; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 14
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 4
+; SSE-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP15:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP17:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP19:%.*]] = insertelement <4 x float> poison, float [[TMP11]], i64 0
 ; SSE-NEXT:    [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP13]], i64 1
 ; SSE-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP15]], i64 2
@@ -568,24 +550,23 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; SSE-NEXT:    [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP16]], i64 2
 ; SSE-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP18]], i64 3
 ; SSE-NEXT:    [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]]
-; SSE-NEXT:    [[TMP28:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; SSE-NEXT:    store <4 x float> [[TMP27]], <4 x float>* [[TMP28]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 33
-; SSE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
-; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 30
-; SSE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 27
-; SSE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
-; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 23
-; SSE-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP29]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP38:%.*]] = load float, float* [[TMP30]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP39:%.*]] = load float, float* [[TMP31]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP40:%.*]] = load float, float* [[TMP32]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP41:%.*]] = load float, float* [[TMP33]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP42:%.*]] = load float, float* [[TMP34]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP43:%.*]] = load float, float* [[TMP35]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP44:%.*]] = load float, float* [[TMP36]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
+; SSE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
+; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
+; SSE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
+; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
+; SSE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
+; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
+; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP43:%.*]] = load float, ptr [[TMP35]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP44:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0
 ; SSE-NEXT:    [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP39]], i64 1
 ; SSE-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 2
@@ -595,42 +576,41 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; SSE-NEXT:    [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP42]], i64 2
 ; SSE-NEXT:    [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP44]], i64 3
 ; SSE-NEXT:    [[TMP53:%.*]] = fdiv <4 x float> [[TMP48]], [[TMP52]]
-; SSE-NEXT:    [[TMP54:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
-; SSE-NEXT:    store <4 x float> [[TMP53]], <4 x float>* [[TMP54]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP53]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_div(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 4
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 10
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 13
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 44
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
-; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 33
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
-; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 30
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 27
-; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
-; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 23
-; AVX-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP22:%.*]] = load float, float* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP25:%.*]] = load float, float* [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP27:%.*]] = load float, float* [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP28:%.*]] = load float, float* [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP29:%.*]] = load float, float* [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP30:%.*]] = load float, float* [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP31:%.*]] = load float, float* [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP32:%.*]] = load float, float* [[TMP16]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 4
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 11
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 14
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
+; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
+; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
+; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
+; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
+; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
+; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
+; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
+; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
+; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP19:%.*]] = load float, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP28:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP30:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP32:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
 ; AVX-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP20]], i64 1
 ; AVX-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP22]], i64 2
@@ -648,42 +628,41 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; AVX-NEXT:    [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP31]], i64 6
 ; AVX-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP33]], i64 7
 ; AVX-NEXT:    [[TMP50:%.*]] = fdiv <8 x float> [[TMP41]], [[TMP49]]
-; AVX-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX-NEXT:    store <8 x float> [[TMP50]], <8 x float>* [[TMP51]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x float> [[TMP50]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_div(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 4
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 10
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 13
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 11
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 44
-; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
-; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 33
-; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
-; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 30
-; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 27
-; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
-; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 23
-; AVX2-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP22:%.*]] = load float, float* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP25:%.*]] = load float, float* [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP27:%.*]] = load float, float* [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP28:%.*]] = load float, float* [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP29:%.*]] = load float, float* [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP30:%.*]] = load float, float* [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP31:%.*]] = load float, float* [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP32:%.*]] = load float, float* [[TMP16]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 4
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 11
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 14
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
+; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
+; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
+; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
+; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
+; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
+; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP19:%.*]] = load float, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP28:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP30:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP32:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
 ; AVX2-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP20]], i64 1
 ; AVX2-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP22]], i64 2
@@ -701,88 +680,85 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; AVX2-NEXT:    [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP31]], i64 6
 ; AVX2-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP33]], i64 7
 ; AVX2-NEXT:    [[TMP50:%.*]] = fdiv <8 x float> [[TMP41]], [[TMP49]]
-; AVX2-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX2-NEXT:    store <8 x float> [[TMP50]], <8 x float>* [[TMP51]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x float> [[TMP50]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_div(
-; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x float*> poison, float* [[TMP1:%.*]], i64 0
-; AVX512F-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x float*> [[TMP3]], <8 x float*> poison, <8 x i32> zeroinitializer
-; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
-; AVX512F-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512F-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
+; AVX512F-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX512F-NEXT:    store <8 x float> [[TMP8]], <8 x float>* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_div(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x float*> poison, float* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x float*> [[TMP3]], <8 x float*> poison, <8 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
-; AVX512VL-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
+; AVX512VL-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]]
-; AVX512VL-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX512VL-NEXT:    store <8 x float> [[TMP8]], <8 x float>* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = load float, float* %1, align 4, !tbaa !2
-  %4 = getelementptr inbounds float, float* %1, i64 4
-  %5 = load float, float* %4, align 4, !tbaa !2
+  %3 = load float, ptr %1, align 4, !tbaa !2
+  %4 = getelementptr inbounds float, ptr %1, i64 4
+  %5 = load float, ptr %4, align 4, !tbaa !2
   %6 = fdiv float %3, %5
-  %7 = getelementptr inbounds float, float* %0, i64 1
-  store float %6, float* %0, align 4, !tbaa !2
-  %8 = getelementptr inbounds float, float* %1, i64 10
-  %9 = load float, float* %8, align 4, !tbaa !2
-  %10 = getelementptr inbounds float, float* %1, i64 13
-  %11 = load float, float* %10, align 4, !tbaa !2
+  %7 = getelementptr inbounds float, ptr %0, i64 1
+  store float %6, ptr %0, align 4, !tbaa !2
+  %8 = getelementptr inbounds float, ptr %1, i64 10
+  %9 = load float, ptr %8, align 4, !tbaa !2
+  %10 = getelementptr inbounds float, ptr %1, i64 13
+  %11 = load float, ptr %10, align 4, !tbaa !2
   %12 = fdiv float %9, %11
-  %13 = getelementptr inbounds float, float* %0, i64 2
-  store float %12, float* %7, align 4, !tbaa !2
-  %14 = getelementptr inbounds float, float* %1, i64 3
-  %15 = load float, float* %14, align 4, !tbaa !2
-  %16 = getelementptr inbounds float, float* %1, i64 11
-  %17 = load float, float* %16, align 4, !tbaa !2
+  %13 = getelementptr inbounds float, ptr %0, i64 2
+  store float %12, ptr %7, align 4, !tbaa !2
+  %14 = getelementptr inbounds float, ptr %1, i64 3
+  %15 = load float, ptr %14, align 4, !tbaa !2
+  %16 = getelementptr inbounds float, ptr %1, i64 11
+  %17 = load float, ptr %16, align 4, !tbaa !2
   %18 = fdiv float %15, %17
-  %19 = getelementptr inbounds float, float* %0, i64 3
-  store float %18, float* %13, align 4, !tbaa !2
-  %20 = getelementptr inbounds float, float* %1, i64 14
-  %21 = load float, float* %20, align 4, !tbaa !2
-  %22 = getelementptr inbounds float, float* %1, i64 44
-  %23 = load float, float* %22, align 4, !tbaa !2
+  %19 = getelementptr inbounds float, ptr %0, i64 3
+  store float %18, ptr %13, align 4, !tbaa !2
+  %20 = getelementptr inbounds float, ptr %1, i64 14
+  %21 = load float, ptr %20, align 4, !tbaa !2
+  %22 = getelementptr inbounds float, ptr %1, i64 44
+  %23 = load float, ptr %22, align 4, !tbaa !2
   %24 = fdiv float %21, %23
-  %25 = getelementptr inbounds float, float* %0, i64 4
-  store float %24, float* %19, align 4, !tbaa !2
-  %26 = getelementptr inbounds float, float* %1, i64 17
-  %27 = load float, float* %26, align 4, !tbaa !2
-  %28 = getelementptr inbounds float, float* %1, i64 33
-  %29 = load float, float* %28, align 4, !tbaa !2
+  %25 = getelementptr inbounds float, ptr %0, i64 4
+  store float %24, ptr %19, align 4, !tbaa !2
+  %26 = getelementptr inbounds float, ptr %1, i64 17
+  %27 = load float, ptr %26, align 4, !tbaa !2
+  %28 = getelementptr inbounds float, ptr %1, i64 33
+  %29 = load float, ptr %28, align 4, !tbaa !2
   %30 = fdiv float %27, %29
-  %31 = getelementptr inbounds float, float* %0, i64 5
-  store float %30, float* %25, align 4, !tbaa !2
-  %32 = getelementptr inbounds float, float* %1, i64 8
-  %33 = load float, float* %32, align 4, !tbaa !2
-  %34 = getelementptr inbounds float, float* %1, i64 30
-  %35 = load float, float* %34, align 4, !tbaa !2
+  %31 = getelementptr inbounds float, ptr %0, i64 5
+  store float %30, ptr %25, align 4, !tbaa !2
+  %32 = getelementptr inbounds float, ptr %1, i64 8
+  %33 = load float, ptr %32, align 4, !tbaa !2
+  %34 = getelementptr inbounds float, ptr %1, i64 30
+  %35 = load float, ptr %34, align 4, !tbaa !2
   %36 = fdiv float %33, %35
-  %37 = getelementptr inbounds float, float* %0, i64 6
-  store float %36, float* %31, align 4, !tbaa !2
-  %38 = getelementptr inbounds float, float* %1, i64 5
-  %39 = load float, float* %38, align 4, !tbaa !2
-  %40 = getelementptr inbounds float, float* %1, i64 27
-  %41 = load float, float* %40, align 4, !tbaa !2
+  %37 = getelementptr inbounds float, ptr %0, i64 6
+  store float %36, ptr %31, align 4, !tbaa !2
+  %38 = getelementptr inbounds float, ptr %1, i64 5
+  %39 = load float, ptr %38, align 4, !tbaa !2
+  %40 = getelementptr inbounds float, ptr %1, i64 27
+  %41 = load float, ptr %40, align 4, !tbaa !2
   %42 = fdiv float %39, %41
-  %43 = getelementptr inbounds float, float* %0, i64 7
-  store float %42, float* %37, align 4, !tbaa !2
-  %44 = getelementptr inbounds float, float* %1, i64 20
-  %45 = load float, float* %44, align 4, !tbaa !2
-  %46 = getelementptr inbounds float, float* %1, i64 23
-  %47 = load float, float* %46, align 4, !tbaa !2
+  %43 = getelementptr inbounds float, ptr %0, i64 7
+  store float %42, ptr %37, align 4, !tbaa !2
+  %44 = getelementptr inbounds float, ptr %1, i64 20
+  %45 = load float, ptr %44, align 4, !tbaa !2
+  %46 = getelementptr inbounds float, ptr %1, i64 23
+  %47 = load float, ptr %46, align 4, !tbaa !2
   %48 = fdiv float %45, %47
-  store float %48, float* %43, align 4, !tbaa !2
+  store float %48, ptr %43, align 4, !tbaa !2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
index 89485bc22322c..cdf69f79a4998 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
@@ -5,273 +5,263 @@
 ; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f  | FileCheck %s --check-prefixes=AVX512F
 ; RUN:  opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL
 
-define void @gather_load(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
+define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i64 0
 ; SSE-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; SSE-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; SSE-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; SSE-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; SSE-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i64 0
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; AVX-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; AVX2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i64 0
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; AVX2-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX2-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i64 0
 ; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1
 ; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2
 ; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3
 ; AVX512F-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512F-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> poison, <4 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x i32*> [[SHUFFLE]], <4 x i64> <i64 0, i64 11, i64 4, i64 1>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 0, i64 11, i64 4, i64 1>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0:![0-9]+]]
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = getelementptr inbounds i32, i32* %1, i64 1
-  %4 = load i32, i32* %1, align 4, !tbaa !2
-  %5 = getelementptr inbounds i32, i32* %0, i64 1
-  %6 = getelementptr inbounds i32, i32* %1, i64 11
-  %7 = load i32, i32* %6, align 4, !tbaa !2
-  %8 = getelementptr inbounds i32, i32* %0, i64 2
-  %9 = getelementptr inbounds i32, i32* %1, i64 4
-  %10 = load i32, i32* %9, align 4, !tbaa !2
-  %11 = getelementptr inbounds i32, i32* %0, i64 3
-  %12 = load i32, i32* %3, align 4, !tbaa !2
+  %3 = getelementptr inbounds i32, ptr %1, i64 1
+  %4 = load i32, ptr %1, align 4, !tbaa !2
+  %5 = getelementptr inbounds i32, ptr %0, i64 1
+  %6 = getelementptr inbounds i32, ptr %1, i64 11
+  %7 = load i32, ptr %6, align 4, !tbaa !2
+  %8 = getelementptr inbounds i32, ptr %0, i64 2
+  %9 = getelementptr inbounds i32, ptr %1, i64 4
+  %10 = load i32, ptr %9, align 4, !tbaa !2
+  %11 = getelementptr inbounds i32, ptr %0, i64 3
+  %12 = load i32, ptr %3, align 4, !tbaa !2
   %13 = insertelement <4 x i32> undef, i32 %4, i32 0
   %14 = insertelement <4 x i32> %13, i32 %7, i32 1
   %15 = insertelement <4 x i32> %14, i32 %10, i32 2
   %16 = insertelement <4 x i32> %15, i32 %12, i32 3
   %17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4>
-  %18 = bitcast i32* %0 to <4 x i32>*
-  store <4 x i32> %17, <4 x i32>* %18, align 4, !tbaa !2
+  store <4 x i32> %17, ptr %0, align 4, !tbaa !2
   ret void
 }
 
-define void @gather_load_2(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
+define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_2(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP5]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; SSE-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP9]], i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; SSE-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP13]], i32* [[TMP10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
+; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
-; SSE-NEXT:    store i32 [[TMP17]], i32* [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_2(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1
 ; AVX-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2
 ; AVX-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
 ; AVX-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_2(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1
 ; AVX2-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2
 ; AVX2-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
 ; AVX2-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX2-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_2(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 1
-; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 10
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 3
-; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 5
-; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 1
+; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 10
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 3
+; AVX512F-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 5
+; AVX512F-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
 ; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 1
 ; AVX512F-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 2
 ; AVX512F-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3
 ; AVX512F-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512F-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_2(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> poison, <4 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x i32*> [[SHUFFLE]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <4 x ptr> [[SHUFFLE]], <4 x i64> <i64 1, i64 10, i64 3, i64 5>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <4 x i32>*
-; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = getelementptr inbounds i32, i32* %1, i64 1
-  %4 = load i32, i32* %3, align 4, !tbaa !2
+  %3 = getelementptr inbounds i32, ptr %1, i64 1
+  %4 = load i32, ptr %3, align 4, !tbaa !2
   %5 = add nsw i32 %4, 1
-  %6 = getelementptr inbounds i32, i32* %0, i64 1
-  store i32 %5, i32* %0, align 4, !tbaa !2
-  %7 = getelementptr inbounds i32, i32* %1, i64 10
-  %8 = load i32, i32* %7, align 4, !tbaa !2
+  %6 = getelementptr inbounds i32, ptr %0, i64 1
+  store i32 %5, ptr %0, align 4, !tbaa !2
+  %7 = getelementptr inbounds i32, ptr %1, i64 10
+  %8 = load i32, ptr %7, align 4, !tbaa !2
   %9 = add nsw i32 %8, 2
-  %10 = getelementptr inbounds i32, i32* %0, i64 2
-  store i32 %9, i32* %6, align 4, !tbaa !2
-  %11 = getelementptr inbounds i32, i32* %1, i64 3
-  %12 = load i32, i32* %11, align 4, !tbaa !2
+  %10 = getelementptr inbounds i32, ptr %0, i64 2
+  store i32 %9, ptr %6, align 4, !tbaa !2
+  %11 = getelementptr inbounds i32, ptr %1, i64 3
+  %12 = load i32, ptr %11, align 4, !tbaa !2
   %13 = add nsw i32 %12, 3
-  %14 = getelementptr inbounds i32, i32* %0, i64 3
-  store i32 %13, i32* %10, align 4, !tbaa !2
-  %15 = getelementptr inbounds i32, i32* %1, i64 5
-  %16 = load i32, i32* %15, align 4, !tbaa !2
+  %14 = getelementptr inbounds i32, ptr %0, i64 3
+  store i32 %13, ptr %10, align 4, !tbaa !2
+  %15 = getelementptr inbounds i32, ptr %1, i64 5
+  %16 = load i32, ptr %15, align 4, !tbaa !2
   %17 = add nsw i32 %16, 4
-  store i32 %17, i32* %14, align 4, !tbaa !2
+  store i32 %17, ptr %14, align 4, !tbaa !2
   ret void
 }
 
 
-define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture readonly %1) {
+define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_3(
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 1
-; SSE-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 1
+; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 11
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
-; SSE-NEXT:    store i32 [[TMP8]], i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; SSE-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
-; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 3
-; SSE-NEXT:    store i32 [[TMP12]], i32* [[TMP9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
-; SSE-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 3
+; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
+; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
-; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
-; SSE-NEXT:    store i32 [[TMP16]], i32* [[TMP13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
-; SSE-NEXT:    [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
+; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
-; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
-; SSE-NEXT:    store i32 [[TMP20]], i32* [[TMP17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
-; SSE-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 5
+; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
+; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
-; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 6
-; SSE-NEXT:    store i32 [[TMP24]], i32* [[TMP21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; SSE-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 6
+; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
+; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
-; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 7
-; SSE-NEXT:    store i32 [[TMP28]], i32* [[TMP25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
-; SSE-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 7
+; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
+; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
-; SSE-NEXT:    store i32 [[TMP32]], i32* [[TMP29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_3(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 11
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
-; AVX-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 11
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
+; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP10]], i64 0
 ; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP11]], i64 1
 ; AVX-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP12]], i64 2
@@ -281,26 +271,25 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
 ; AVX-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP16]], i64 6
 ; AVX-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
 ; AVX-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX-NEXT:    store <8 x i32> [[TMP26]], <8 x i32>* [[TMP27]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_3(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1:%.*]], i64 11
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 15
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 18
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 9
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 6
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 21
-; AVX2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 11
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 15
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 18
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 9
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 6
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 21
+; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP10]], i64 0
 ; AVX2-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP11]], i64 1
 ; AVX2-NEXT:    [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP12]], i64 2
@@ -310,95 +299,92 @@ define void @gather_load_3(i32* noalias nocapture %0, i32* noalias nocapture rea
 ; AVX2-NEXT:    [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP16]], i64 6
 ; AVX2-NEXT:    [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7
 ; AVX2-NEXT:    [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP26]], <8 x i32>* [[TMP27]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_3(
-; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP3]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512F-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512F-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX512F-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_3(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32*> poison, i32* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP3]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[TMP5]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP0:%.*]] to <8 x i32>*
-; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %3 = load i32, i32* %1, align 4, !tbaa !2
+  %3 = load i32, ptr %1, align 4, !tbaa !2
   %4 = add i32 %3, 1
-  %5 = getelementptr inbounds i32, i32* %0, i64 1
-  store i32 %4, i32* %0, align 4, !tbaa !2
-  %6 = getelementptr inbounds i32, i32* %1, i64 11
-  %7 = load i32, i32* %6, align 4, !tbaa !2
+  %5 = getelementptr inbounds i32, ptr %0, i64 1
+  store i32 %4, ptr %0, align 4, !tbaa !2
+  %6 = getelementptr inbounds i32, ptr %1, i64 11
+  %7 = load i32, ptr %6, align 4, !tbaa !2
   %8 = add i32 %7, 2
-  %9 = getelementptr inbounds i32, i32* %0, i64 2
-  store i32 %8, i32* %5, align 4, !tbaa !2
-  %10 = getelementptr inbounds i32, i32* %1, i64 4
-  %11 = load i32, i32* %10, align 4, !tbaa !2
+  %9 = getelementptr inbounds i32, ptr %0, i64 2
+  store i32 %8, ptr %5, align 4, !tbaa !2
+  %10 = getelementptr inbounds i32, ptr %1, i64 4
+  %11 = load i32, ptr %10, align 4, !tbaa !2
   %12 = add i32 %11, 3
-  %13 = getelementptr inbounds i32, i32* %0, i64 3
-  store i32 %12, i32* %9, align 4, !tbaa !2
-  %14 = getelementptr inbounds i32, i32* %1, i64 15
-  %15 = load i32, i32* %14, align 4, !tbaa !2
+  %13 = getelementptr inbounds i32, ptr %0, i64 3
+  store i32 %12, ptr %9, align 4, !tbaa !2
+  %14 = getelementptr inbounds i32, ptr %1, i64 15
+  %15 = load i32, ptr %14, align 4, !tbaa !2
   %16 = add i32 %15, 4
-  %17 = getelementptr inbounds i32, i32* %0, i64 4
-  store i32 %16, i32* %13, align 4, !tbaa !2
-  %18 = getelementptr inbounds i32, i32* %1, i64 18
-  %19 = load i32, i32* %18, align 4, !tbaa !2
+  %17 = getelementptr inbounds i32, ptr %0, i64 4
+  store i32 %16, ptr %13, align 4, !tbaa !2
+  %18 = getelementptr inbounds i32, ptr %1, i64 18
+  %19 = load i32, ptr %18, align 4, !tbaa !2
   %20 = add i32 %19, 1
-  %21 = getelementptr inbounds i32, i32* %0, i64 5
-  store i32 %20, i32* %17, align 4, !tbaa !2
-  %22 = getelementptr inbounds i32, i32* %1, i64 9
-  %23 = load i32, i32* %22, align 4, !tbaa !2
+  %21 = getelementptr inbounds i32, ptr %0, i64 5
+  store i32 %20, ptr %17, align 4, !tbaa !2
+  %22 = getelementptr inbounds i32, ptr %1, i64 9
+  %23 = load i32, ptr %22, align 4, !tbaa !2
   %24 = add i32 %23, 2
-  %25 = getelementptr inbounds i32, i32* %0, i64 6
-  store i32 %24, i32* %21, align 4, !tbaa !2
-  %26 = getelementptr inbounds i32, i32* %1, i64 6
-  %27 = load i32, i32* %26, align 4, !tbaa !2
+  %25 = getelementptr inbounds i32, ptr %0, i64 6
+  store i32 %24, ptr %21, align 4, !tbaa !2
+  %26 = getelementptr inbounds i32, ptr %1, i64 6
+  %27 = load i32, ptr %26, align 4, !tbaa !2
   %28 = add i32 %27, 3
-  %29 = getelementptr inbounds i32, i32* %0, i64 7
-  store i32 %28, i32* %25, align 4, !tbaa !2
-  %30 = getelementptr inbounds i32, i32* %1, i64 21
-  %31 = load i32, i32* %30, align 4, !tbaa !2
+  %29 = getelementptr inbounds i32, ptr %0, i64 7
+  store i32 %28, ptr %25, align 4, !tbaa !2
+  %30 = getelementptr inbounds i32, ptr %1, i64 21
+  %31 = load i32, ptr %30, align 4, !tbaa !2
   %32 = add i32 %31, 4
-  store i32 %32, i32* %29, align 4, !tbaa !2
+  store i32 %32, ptr %29, align 4, !tbaa !2
   ret void
 }
 
-define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture readonly %t1) {
+define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
 ; SSE-LABEL: @gather_load_4(
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, i32* [[T0:%.*]], i64 1
-; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
-; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 2
-; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
-; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 3
-; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
-; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4
-; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
-; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
-; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
-; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
-; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
-; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
-; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
-; SSE-NEXT:    [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i32, ptr [[T0:%.*]], i64 1
+; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 2
+; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
+; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 3
+; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
+; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 4
+; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
+; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 5
+; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
+; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 6
+; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
+; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i32, ptr [[T0]], i64 7
+; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
+; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[T4:%.*]] = add i32 [[T3]], 1
 ; SSE-NEXT:    [[T8:%.*]] = add i32 [[T7]], 2
 ; SSE-NEXT:    [[T12:%.*]] = add i32 [[T11]], 3
@@ -407,32 +393,32 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
 ; SSE-NEXT:    [[T24:%.*]] = add i32 [[T23]], 2
 ; SSE-NEXT:    [[T28:%.*]] = add i32 [[T27]], 3
 ; SSE-NEXT:    [[T32:%.*]] = add i32 [[T31]], 4
-; SSE-NEXT:    store i32 [[T4]], i32* [[T0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T8]], i32* [[T5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T12]], i32* [[T9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T16]], i32* [[T13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T20]], i32* [[T17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T24]], i32* [[T21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T28]], i32* [[T25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    store i32 [[T32]], i32* [[T29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_4(
-; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
-; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
-; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
-; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
-; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
-; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
-; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
-; AVX-NEXT:    [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
+; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
+; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
+; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
+; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
+; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
+; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
 ; AVX-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
@@ -442,26 +428,25 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
 ; AVX-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
 ; AVX-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
 ; AVX-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX-NEXT:    [[TMP10:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX-NEXT:    store <8 x i32> [[TMP9]], <8 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_4(
-; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, i32* [[T1:%.*]], i64 11
-; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
-; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 15
-; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 18
-; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 9
-; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
-; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 21
-; AVX2-NEXT:    [[T3:%.*]] = load i32, i32* [[T1]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T7:%.*]] = load i32, i32* [[T6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T19:%.*]] = load i32, i32* [[T18]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T27:%.*]] = load i32, i32* [[T26]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[T31:%.*]] = load i32, i32* [[T30]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds i32, ptr [[T1:%.*]], i64 11
+; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 4
+; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 15
+; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 18
+; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 9
+; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 6
+; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 21
+; AVX2-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0
 ; AVX2-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1
 ; AVX2-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2
@@ -471,53 +456,50 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
 ; AVX2-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6
 ; AVX2-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7
 ; AVX2-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX2-NEXT:    [[TMP10:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP9]], <8 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_4(
-; AVX512F-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32*> poison, i32* [[T1:%.*]], i64 0
-; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP1]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512F-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512F-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
+; AVX512F-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512F-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512F-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[TMP3]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512F-NEXT:    [[TMP5:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX512F-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_4(
-; AVX512VL-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32*> poison, i32* [[T1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32*> [[TMP1]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
-; AVX512VL-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21>
+; AVX512VL-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[TMP3]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = bitcast i32* [[T0:%.*]] to <8 x i32>*
-; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], <8 x i32>* [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
-  %t5 = getelementptr inbounds i32, i32* %t0, i64 1
-  %t6 = getelementptr inbounds i32, i32* %t1, i64 11
-  %t9 = getelementptr inbounds i32, i32* %t0, i64 2
-  %t10 = getelementptr inbounds i32, i32* %t1, i64 4
-  %t13 = getelementptr inbounds i32, i32* %t0, i64 3
-  %t14 = getelementptr inbounds i32, i32* %t1, i64 15
-  %t17 = getelementptr inbounds i32, i32* %t0, i64 4
-  %t18 = getelementptr inbounds i32, i32* %t1, i64 18
-  %t21 = getelementptr inbounds i32, i32* %t0, i64 5
-  %t22 = getelementptr inbounds i32, i32* %t1, i64 9
-  %t25 = getelementptr inbounds i32, i32* %t0, i64 6
-  %t26 = getelementptr inbounds i32, i32* %t1, i64 6
-  %t29 = getelementptr inbounds i32, i32* %t0, i64 7
-  %t30 = getelementptr inbounds i32, i32* %t1, i64 21
+  %t5 = getelementptr inbounds i32, ptr %t0, i64 1
+  %t6 = getelementptr inbounds i32, ptr %t1, i64 11
+  %t9 = getelementptr inbounds i32, ptr %t0, i64 2
+  %t10 = getelementptr inbounds i32, ptr %t1, i64 4
+  %t13 = getelementptr inbounds i32, ptr %t0, i64 3
+  %t14 = getelementptr inbounds i32, ptr %t1, i64 15
+  %t17 = getelementptr inbounds i32, ptr %t0, i64 4
+  %t18 = getelementptr inbounds i32, ptr %t1, i64 18
+  %t21 = getelementptr inbounds i32, ptr %t0, i64 5
+  %t22 = getelementptr inbounds i32, ptr %t1, i64 9
+  %t25 = getelementptr inbounds i32, ptr %t0, i64 6
+  %t26 = getelementptr inbounds i32, ptr %t1, i64 6
+  %t29 = getelementptr inbounds i32, ptr %t0, i64 7
+  %t30 = getelementptr inbounds i32, ptr %t1, i64 21
 
-  %t3 = load i32, i32* %t1, align 4, !tbaa !2
-  %t7 = load i32, i32* %t6, align 4, !tbaa !2
-  %t11 = load i32, i32* %t10, align 4, !tbaa !2
-  %t15 = load i32, i32* %t14, align 4, !tbaa !2
-  %t19 = load i32, i32* %t18, align 4, !tbaa !2
-  %t23 = load i32, i32* %t22, align 4, !tbaa !2
-  %t27 = load i32, i32* %t26, align 4, !tbaa !2
-  %t31 = load i32, i32* %t30, align 4, !tbaa !2
+  %t3 = load i32, ptr %t1, align 4, !tbaa !2
+  %t7 = load i32, ptr %t6, align 4, !tbaa !2
+  %t11 = load i32, ptr %t10, align 4, !tbaa !2
+  %t15 = load i32, ptr %t14, align 4, !tbaa !2
+  %t19 = load i32, ptr %t18, align 4, !tbaa !2
+  %t23 = load i32, ptr %t22, align 4, !tbaa !2
+  %t27 = load i32, ptr %t26, align 4, !tbaa !2
+  %t31 = load i32, ptr %t30, align 4, !tbaa !2
 
   %t4 = add i32 %t3, 1
   %t8 = add i32 %t7, 2
@@ -528,37 +510,37 @@ define void @gather_load_4(i32* noalias nocapture %t0, i32* noalias nocapture re
   %t28 = add i32 %t27, 3
   %t32 = add i32 %t31, 4
 
-  store i32 %t4, i32* %t0, align 4, !tbaa !2
-  store i32 %t8, i32* %t5, align 4, !tbaa !2
-  store i32 %t12, i32* %t9, align 4, !tbaa !2
-  store i32 %t16, i32* %t13, align 4, !tbaa !2
-  store i32 %t20, i32* %t17, align 4, !tbaa !2
-  store i32 %t24, i32* %t21, align 4, !tbaa !2
-  store i32 %t28, i32* %t25, align 4, !tbaa !2
-  store i32 %t32, i32* %t29, align 4, !tbaa !2
+  store i32 %t4, ptr %t0, align 4, !tbaa !2
+  store i32 %t8, ptr %t5, align 4, !tbaa !2
+  store i32 %t12, ptr %t9, align 4, !tbaa !2
+  store i32 %t16, ptr %t13, align 4, !tbaa !2
+  store i32 %t20, ptr %t17, align 4, !tbaa !2
+  store i32 %t24, ptr %t21, align 4, !tbaa !2
+  store i32 %t28, ptr %t25, align 4, !tbaa !2
+  store i32 %t32, ptr %t29, align 4, !tbaa !2
 
   ret void
 }
 
 
-define void @gather_load_div(float* noalias nocapture %0, float* noalias nocapture readonly %1) {
+define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_div(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 4
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 10
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 13
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 11
-; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 44
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 4
-; SSE-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP12:%.*]] = load float, float* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP14:%.*]] = load float, float* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP15:%.*]] = load float, float* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 4
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 11
+; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 14
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 4
+; SSE-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP15:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP17:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP19:%.*]] = insertelement <4 x float> poison, float [[TMP11]], i64 0
 ; SSE-NEXT:    [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP13]], i64 1
 ; SSE-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP15]], i64 2
@@ -568,24 +550,23 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; SSE-NEXT:    [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP16]], i64 2
 ; SSE-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP18]], i64 3
 ; SSE-NEXT:    [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]]
-; SSE-NEXT:    [[TMP28:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; SSE-NEXT:    store <4 x float> [[TMP27]], <4 x float>* [[TMP28]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 33
-; SSE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
-; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 30
-; SSE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
-; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 27
-; SSE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
-; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 23
-; SSE-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP29]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP38:%.*]] = load float, float* [[TMP30]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP39:%.*]] = load float, float* [[TMP31]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP40:%.*]] = load float, float* [[TMP32]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP41:%.*]] = load float, float* [[TMP33]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP42:%.*]] = load float, float* [[TMP34]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP43:%.*]] = load float, float* [[TMP35]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP44:%.*]] = load float, float* [[TMP36]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP27]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
+; SSE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
+; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
+; SSE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
+; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
+; SSE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
+; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
+; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP43:%.*]] = load float, ptr [[TMP35]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    [[TMP44:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP37]], i64 0
 ; SSE-NEXT:    [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP39]], i64 1
 ; SSE-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP41]], i64 2
@@ -595,42 +576,41 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; SSE-NEXT:    [[TMP51:%.*]] = insertelement <4 x float> [[TMP50]], float [[TMP42]], i64 2
 ; SSE-NEXT:    [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP44]], i64 3
 ; SSE-NEXT:    [[TMP53:%.*]] = fdiv <4 x float> [[TMP48]], [[TMP52]]
-; SSE-NEXT:    [[TMP54:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
-; SSE-NEXT:    store <4 x float> [[TMP53]], <4 x float>* [[TMP54]], align 4, !tbaa [[TBAA0]]
+; SSE-NEXT:    store <4 x float> [[TMP53]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_div(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 4
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 10
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 13
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 11
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 44
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
-; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 33
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
-; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 30
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
-; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 27
-; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
-; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 23
-; AVX-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP22:%.*]] = load float, float* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP25:%.*]] = load float, float* [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP27:%.*]] = load float, float* [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP28:%.*]] = load float, float* [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP29:%.*]] = load float, float* [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP30:%.*]] = load float, float* [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP31:%.*]] = load float, float* [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP32:%.*]] = load float, float* [[TMP16]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 4
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 11
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 14
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
+; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
+; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
+; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
+; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
+; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
+; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
+; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
+; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
+; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP19:%.*]] = load float, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP28:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP30:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP32:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
 ; AVX-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP20]], i64 1
 ; AVX-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP22]], i64 2
@@ -648,42 +628,41 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; AVX-NEXT:    [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP31]], i64 6
 ; AVX-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP33]], i64 7
 ; AVX-NEXT:    [[TMP50:%.*]] = fdiv <8 x float> [[TMP41]], [[TMP49]]
-; AVX-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX-NEXT:    store <8 x float> [[TMP50]], <8 x float>* [[TMP51]], align 4, !tbaa [[TBAA0]]
+; AVX-NEXT:    store <8 x float> [[TMP50]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_div(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 4
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 10
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 13
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 3
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 11
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 14
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 44
-; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 17
-; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 33
-; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 8
-; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 30
-; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5
-; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 27
-; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 20
-; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 23
-; AVX2-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP22:%.*]] = load float, float* [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP25:%.*]] = load float, float* [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP27:%.*]] = load float, float* [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP28:%.*]] = load float, float* [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP29:%.*]] = load float, float* [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP30:%.*]] = load float, float* [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP31:%.*]] = load float, float* [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP32:%.*]] = load float, float* [[TMP16]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP17]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 4
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 10
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 13
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 3
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 11
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 14
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 44
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 17
+; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 33
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
+; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 30
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 5
+; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 27
+; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 20
+; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 23
+; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP19:%.*]] = load float, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP28:%.*]] = load float, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP30:%.*]] = load float, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP32:%.*]] = load float, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
 ; AVX2-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP20]], i64 1
 ; AVX2-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP22]], i64 2
@@ -701,110 +680,106 @@ define void @gather_load_div(float* noalias nocapture %0, float* noalias nocaptu
 ; AVX2-NEXT:    [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP31]], i64 6
 ; AVX2-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP33]], i64 7
 ; AVX2-NEXT:    [[TMP50:%.*]] = fdiv <8 x float> [[TMP41]], [[TMP49]]
-; AVX2-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX2-NEXT:    store <8 x float> [[TMP50]], <8 x float>* [[TMP51]], align 4, !tbaa [[TBAA0]]
+; AVX2-NEXT:    store <8 x float> [[TMP50]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_div(
-; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x float*> poison, float* [[TMP1:%.*]], i64 0
-; AVX512F-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x float*> [[TMP3]], <8 x float*> poison, <8 x i32> zeroinitializer
-; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
-; AVX512F-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512F-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512F-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
+; AVX512F-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512F-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX512F-NEXT:    store <8 x float> [[TMP8]], <8 x float>* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512F-NEXT:    store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_div(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x float*> poison, float* [[TMP1:%.*]], i64 0
-; AVX512VL-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x float*> [[TMP3]], <8 x float*> poison, <8 x i32> zeroinitializer
-; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
-; AVX512VL-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0
+; AVX512VL-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512VL-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[SHUFFLE1]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20>
+; AVX512VL-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison), !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = fdiv <8 x float> [[TMP6]], [[TMP7]]
-; AVX512VL-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX512VL-NEXT:    store <8 x float> [[TMP8]], <8 x float>* [[TMP9]], align 4, !tbaa [[TBAA0]]
+; AVX512VL-NEXT:    store <8 x float> [[TMP8]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    ret void
 ;
 ; AVX512-LABEL: @gather_load_div(
-; AVX512-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 10
-; AVX512-NEXT:    [[TMP4:%.*]] = insertelement <2 x float*> poison, float* [[TMP1]], i32 0
-; AVX512-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float*> [[TMP4]], <2 x float*> poison, <2 x i32> zeroinitializer
-; AVX512-NEXT:    [[TMP6:%.*]] = getelementptr float, <2 x float*> [[TMP5]], <2 x i64> <i64 3, i64 14>
-; AVX512-NEXT:    [[TMP7:%.*]] = insertelement <4 x float*> poison, float* [[TMP1]], i32 0
-; AVX512-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float*> [[TMP7]], <4 x float*> poison, <4 x i32> zeroinitializer
-; AVX512-NEXT:    [[TMP9:%.*]] = getelementptr float, <4 x float*> [[TMP8]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
-; AVX512-NEXT:    [[TMP10:%.*]] = insertelement <8 x float*> poison, float* [[TMP1]], i32 0
-; AVX512-NEXT:    [[TMP11:%.*]] = insertelement <8 x float*> [[TMP10]], float* [[TMP3]], i32 1
-; AVX512-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float*> [[TMP6]], <2 x float*> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; AVX512-NEXT:    [[TMP13:%.*]] = shufflevector <8 x float*> [[TMP11]], <8 x float*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
-; AVX512-NEXT:    [[TMP14:%.*]] = shufflevector <4 x float*> [[TMP9]], <4 x float*> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; AVX512-NEXT:    [[TMP15:%.*]] = shufflevector <8 x float*> [[TMP13]], <8 x float*> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX512-NEXT:    [[TMP16:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP15]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
-; AVX512-NEXT:    [[TMP17:%.*]] = shufflevector <8 x float*> [[TMP10]], <8 x float*> poison, <8 x i32> zeroinitializer
-; AVX512-NEXT:    [[TMP18:%.*]] = getelementptr float, <8 x float*> [[TMP17]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
-; AVX512-NEXT:    [[TMP19:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP18]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
+; AVX512-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1:%.*]], i64 10
+; AVX512-NEXT:    [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP1]], i32 0
+; AVX512-NEXT:    [[TMP5:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; AVX512-NEXT:    [[TMP6:%.*]] = getelementptr float, <2 x ptr> [[TMP5]], <2 x i64> <i64 3, i64 14>
+; AVX512-NEXT:    [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i32 0
+; AVX512-NEXT:    [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; AVX512-NEXT:    [[TMP9:%.*]] = getelementptr float, <4 x ptr> [[TMP8]], <4 x i64> <i64 17, i64 8, i64 5, i64 20>
+; AVX512-NEXT:    [[TMP10:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1]], i32 0
+; AVX512-NEXT:    [[TMP11:%.*]] = insertelement <8 x ptr> [[TMP10]], ptr [[TMP3]], i32 1
+; AVX512-NEXT:    [[TMP12:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; AVX512-NEXT:    [[TMP13:%.*]] = shufflevector <8 x ptr> [[TMP11]], <8 x ptr> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+; AVX512-NEXT:    [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP9]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; AVX512-NEXT:    [[TMP15:%.*]] = shufflevector <8 x ptr> [[TMP13]], <8 x ptr> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX512-NEXT:    [[TMP16:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP15]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
+; AVX512-NEXT:    [[TMP17:%.*]] = shufflevector <8 x ptr> [[TMP10]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; AVX512-NEXT:    [[TMP18:%.*]] = getelementptr float, <8 x ptr> [[TMP17]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23>
+; AVX512-NEXT:    [[TMP19:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP18]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef), !tbaa [[TBAA0]]
 ; AVX512-NEXT:    [[TMP20:%.*]] = fdiv <8 x float> [[TMP16]], [[TMP19]]
-; AVX512-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP0:%.*]] to <8 x float>*
-; AVX512-NEXT:    store <8 x float> [[TMP20]], <8 x float>* [[TMP21]], align 4, !tbaa [[TBAA0]]
+; AVX512-NEXT:    store <8 x float> [[TMP20]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]]
 ; AVX512-NEXT:    ret void
-  %3 = load float, float* %1, align 4, !tbaa !2
-  %4 = getelementptr inbounds float, float* %1, i64 4
-  %5 = load float, float* %4, align 4, !tbaa !2
+  %3 = load float, ptr %1, align 4, !tbaa !2
+  %4 = getelementptr inbounds float, ptr %1, i64 4
+  %5 = load float, ptr %4, align 4, !tbaa !2
   %6 = fdiv float %3, %5
-  %7 = getelementptr inbounds float, float* %0, i64 1
-  store float %6, float* %0, align 4, !tbaa !2
-  %8 = getelementptr inbounds float, float* %1, i64 10
-  %9 = load float, float* %8, align 4, !tbaa !2
-  %10 = getelementptr inbounds float, float* %1, i64 13
-  %11 = load float, float* %10, align 4, !tbaa !2
+  %7 = getelementptr inbounds float, ptr %0, i64 1
+  store float %6, ptr %0, align 4, !tbaa !2
+  %8 = getelementptr inbounds float, ptr %1, i64 10
+  %9 = load float, ptr %8, align 4, !tbaa !2
+  %10 = getelementptr inbounds float, ptr %1, i64 13
+  %11 = load float, ptr %10, align 4, !tbaa !2
   %12 = fdiv float %9, %11
-  %13 = getelementptr inbounds float, float* %0, i64 2
-  store float %12, float* %7, align 4, !tbaa !2
-  %14 = getelementptr inbounds float, float* %1, i64 3
-  %15 = load float, float* %14, align 4, !tbaa !2
-  %16 = getelementptr inbounds float, float* %1, i64 11
-  %17 = load float, float* %16, align 4, !tbaa !2
+  %13 = getelementptr inbounds float, ptr %0, i64 2
+  store float %12, ptr %7, align 4, !tbaa !2
+  %14 = getelementptr inbounds float, ptr %1, i64 3
+  %15 = load float, ptr %14, align 4, !tbaa !2
+  %16 = getelementptr inbounds float, ptr %1, i64 11
+  %17 = load float, ptr %16, align 4, !tbaa !2
   %18 = fdiv float %15, %17
-  %19 = getelementptr inbounds float, float* %0, i64 3
-  store float %18, float* %13, align 4, !tbaa !2
-  %20 = getelementptr inbounds float, float* %1, i64 14
-  %21 = load float, float* %20, align 4, !tbaa !2
-  %22 = getelementptr inbounds float, float* %1, i64 44
-  %23 = load float, float* %22, align 4, !tbaa !2
+  %19 = getelementptr inbounds float, ptr %0, i64 3
+  store float %18, ptr %13, align 4, !tbaa !2
+  %20 = getelementptr inbounds float, ptr %1, i64 14
+  %21 = load float, ptr %20, align 4, !tbaa !2
+  %22 = getelementptr inbounds float, ptr %1, i64 44
+  %23 = load float, ptr %22, align 4, !tbaa !2
   %24 = fdiv float %21, %23
-  %25 = getelementptr inbounds float, float* %0, i64 4
-  store float %24, float* %19, align 4, !tbaa !2
-  %26 = getelementptr inbounds float, float* %1, i64 17
-  %27 = load float, float* %26, align 4, !tbaa !2
-  %28 = getelementptr inbounds float, float* %1, i64 33
-  %29 = load float, float* %28, align 4, !tbaa !2
+  %25 = getelementptr inbounds float, ptr %0, i64 4
+  store float %24, ptr %19, align 4, !tbaa !2
+  %26 = getelementptr inbounds float, ptr %1, i64 17
+  %27 = load float, ptr %26, align 4, !tbaa !2
+  %28 = getelementptr inbounds float, ptr %1, i64 33
+  %29 = load float, ptr %28, align 4, !tbaa !2
   %30 = fdiv float %27, %29
-  %31 = getelementptr inbounds float, float* %0, i64 5
-  store float %30, float* %25, align 4, !tbaa !2
-  %32 = getelementptr inbounds float, float* %1, i64 8
-  %33 = load float, float* %32, align 4, !tbaa !2
-  %34 = getelementptr inbounds float, float* %1, i64 30
-  %35 = load float, float* %34, align 4, !tbaa !2
+  %31 = getelementptr inbounds float, ptr %0, i64 5
+  store float %30, ptr %25, align 4, !tbaa !2
+  %32 = getelementptr inbounds float, ptr %1, i64 8
+  %33 = load float, ptr %32, align 4, !tbaa !2
+  %34 = getelementptr inbounds float, ptr %1, i64 30
+  %35 = load float, ptr %34, align 4, !tbaa !2
   %36 = fdiv float %33, %35
-  %37 = getelementptr inbounds float, float* %0, i64 6
-  store float %36, float* %31, align 4, !tbaa !2
-  %38 = getelementptr inbounds float, float* %1, i64 5
-  %39 = load float, float* %38, align 4, !tbaa !2
-  %40 = getelementptr inbounds float, float* %1, i64 27
-  %41 = load float, float* %40, align 4, !tbaa !2
+  %37 = getelementptr inbounds float, ptr %0, i64 6
+  store float %36, ptr %31, align 4, !tbaa !2
+  %38 = getelementptr inbounds float, ptr %1, i64 5
+  %39 = load float, ptr %38, align 4, !tbaa !2
+  %40 = getelementptr inbounds float, ptr %1, i64 27
+  %41 = load float, ptr %40, align 4, !tbaa !2
   %42 = fdiv float %39, %41
-  %43 = getelementptr inbounds float, float* %0, i64 7
-  store float %42, float* %37, align 4, !tbaa !2
-  %44 = getelementptr inbounds float, float* %1, i64 20
-  %45 = load float, float* %44, align 4, !tbaa !2
-  %46 = getelementptr inbounds float, float* %1, i64 23
-  %47 = load float, float* %46, align 4, !tbaa !2
+  %43 = getelementptr inbounds float, ptr %0, i64 7
+  store float %42, ptr %37, align 4, !tbaa !2
+  %44 = getelementptr inbounds float, ptr %1, i64 20
+  %45 = load float, ptr %44, align 4, !tbaa !2
+  %46 = getelementptr inbounds float, ptr %1, i64 23
+  %47 = load float, ptr %46, align 4, !tbaa !2
   %48 = fdiv float %45, %47
-  store float %48, float* %43, align 4, !tbaa !2
+  store float %48, ptr %43, align 4, !tbaa !2
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
index 845122293feea..b1bc5d14550ef 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
@@ -1,71 +1,69 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-- -mcpu=skylake-avx512 | FileCheck %s
 
-define void @foo(i8* noalias nocapture %t0, i8* noalias nocapture readonly %t1) {
+define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[T1:%.*]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[T1:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP2]], <8 x i8> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[T0:%.*]] to <8 x i8>*
-; CHECK-NEXT:    store <8 x i8> [[TMP5]], <8 x i8>* [[TMP6]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <8 x i8> [[TMP5]], ptr [[T0:%.*]], align 1, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
-  %t3 = load i8, i8* %t1, align 1, !tbaa !3
+  %t3 = load i8, ptr %t1, align 1, !tbaa !3
   %t4 = icmp ult i8 %t3, 64
   %t5 = sub i8 0, %t3
   %t6 = select i1 %t4, i8 %t3, i8 %t5
-  store i8 %t6, i8* %t0, align 1, !tbaa !3
-  %t7 = getelementptr inbounds i8, i8* %t1, i64 1
-  %t8 = load i8, i8* %t7, align 1, !tbaa !3
+  store i8 %t6, ptr %t0, align 1, !tbaa !3
+  %t7 = getelementptr inbounds i8, ptr %t1, i64 1
+  %t8 = load i8, ptr %t7, align 1, !tbaa !3
   %t9 = icmp ult i8 %t8, 64
   %t10 = sub i8 0, %t8
   %t11 = select i1 %t9, i8 %t8, i8 %t10
-  %t12 = getelementptr inbounds i8, i8* %t0, i64 1
-  store i8 %t11, i8* %t12, align 1, !tbaa !3
-  %t13 = getelementptr inbounds i8, i8* %t1, i64 2
-  %t14 = load i8, i8* %t13, align 1, !tbaa !3
+  %t12 = getelementptr inbounds i8, ptr %t0, i64 1
+  store i8 %t11, ptr %t12, align 1, !tbaa !3
+  %t13 = getelementptr inbounds i8, ptr %t1, i64 2
+  %t14 = load i8, ptr %t13, align 1, !tbaa !3
   %t15 = icmp ult i8 %t14, 64
   %t16 = sub i8 0, %t14
   %t17 = select i1 %t15, i8 %t14, i8 %t16
-  %t18 = getelementptr inbounds i8, i8* %t0, i64 2
-  store i8 %t17, i8* %t18, align 1, !tbaa !3
-  %t19 = getelementptr inbounds i8, i8* %t1, i64 3
-  %t20 = load i8, i8* %t19, align 1, !tbaa !3
+  %t18 = getelementptr inbounds i8, ptr %t0, i64 2
+  store i8 %t17, ptr %t18, align 1, !tbaa !3
+  %t19 = getelementptr inbounds i8, ptr %t1, i64 3
+  %t20 = load i8, ptr %t19, align 1, !tbaa !3
   %t21 = icmp ult i8 %t20, 64
   %t22 = sub i8 0, %t20
   %t23 = select i1 %t21, i8 %t20, i8 %t22
-  %t24 = getelementptr inbounds i8, i8* %t0, i64 3
-  store i8 %t23, i8* %t24, align 1, !tbaa !3
-  %t25 = getelementptr inbounds i8, i8* %t1, i64 4
-  %t26 = load i8, i8* %t25, align 1, !tbaa !3
+  %t24 = getelementptr inbounds i8, ptr %t0, i64 3
+  store i8 %t23, ptr %t24, align 1, !tbaa !3
+  %t25 = getelementptr inbounds i8, ptr %t1, i64 4
+  %t26 = load i8, ptr %t25, align 1, !tbaa !3
   %t27 = icmp ult i8 %t26, 64
   %t28 = sub i8 0, %t26
   %t29 = select i1 %t27, i8 %t26, i8 %t28
-  %t30 = getelementptr inbounds i8, i8* %t0, i64 4
-  store i8 %t29, i8* %t30, align 1, !tbaa !3
-  %t31 = getelementptr inbounds i8, i8* %t1, i64 5
-  %t32 = load i8, i8* %t31, align 1, !tbaa !3
+  %t30 = getelementptr inbounds i8, ptr %t0, i64 4
+  store i8 %t29, ptr %t30, align 1, !tbaa !3
+  %t31 = getelementptr inbounds i8, ptr %t1, i64 5
+  %t32 = load i8, ptr %t31, align 1, !tbaa !3
   %t33 = icmp ult i8 %t32, 64
   %t34 = sub i8 0, %t32
   %t35 = select i1 %t33, i8 %t32, i8 %t34
-  %t36 = getelementptr inbounds i8, i8* %t0, i64 5
-  store i8 %t35, i8* %t36, align 1, !tbaa !3
-  %t37 = getelementptr inbounds i8, i8* %t1, i64 6
-  %t38 = load i8, i8* %t37, align 1, !tbaa !3
+  %t36 = getelementptr inbounds i8, ptr %t0, i64 5
+  store i8 %t35, ptr %t36, align 1, !tbaa !3
+  %t37 = getelementptr inbounds i8, ptr %t1, i64 6
+  %t38 = load i8, ptr %t37, align 1, !tbaa !3
   %t39 = icmp ult i8 %t38, 64
   %t40 = sub i8 0, %t38
   %t41 = select i1 %t39, i8 %t38, i8 %t40
-  %t42 = getelementptr inbounds i8, i8* %t0, i64 6
-  store i8 %t41, i8* %t42, align 1, !tbaa !3
-  %t43 = getelementptr inbounds i8, i8* %t1, i64 7
-  %t44 = load i8, i8* %t43, align 1, !tbaa !3
+  %t42 = getelementptr inbounds i8, ptr %t0, i64 6
+  store i8 %t41, ptr %t42, align 1, !tbaa !3
+  %t43 = getelementptr inbounds i8, ptr %t1, i64 7
+  %t44 = load i8, ptr %t43, align 1, !tbaa !3
   %t45 = icmp ult i8 %t44, 64
   %t46 = sub i8 0, %t44
   %t47 = select i1 %t45, i8 %t44, i8 %t46
-  %t48 = getelementptr inbounds i8, i8* %t0, i64 7
-  store i8 %t47, i8* %t48, align 1, !tbaa !3
+  %t48 = getelementptr inbounds i8, ptr %t0, i64 7
+  store i8 %t47, ptr %t48, align 1, !tbaa !3
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
index 6cc5ae7b23de9..3f2211ce31806 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll
@@ -2,39 +2,39 @@
 ; RUN: opt < %s -passes=slp-vectorizer -S -mcpu=core-i7 | FileCheck %s
 ; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-100 -S -mcpu=core-i7 | FileCheck %s --check-prefix=FORCE_SLP
 
-define  <4 x i8> @test(<4 x i8> %v, i8* %x) {
+define  <4 x i8> @test(<4 x i8> %v, ptr %x) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[X0:%.*]] = load i8, i8* [[X:%.*]], align 4
-; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 1
-; CHECK-NEXT:    [[X1:%.*]] = load i8, i8* [[G1]], align 4
+; CHECK-NEXT:    [[X0:%.*]] = load i8, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 1
+; CHECK-NEXT:    [[X1:%.*]] = load i8, ptr [[G1]], align 4
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0
 ; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1
 ; CHECK-NEXT:    [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]]
 ; CHECK-NEXT:    ret <4 x i8> [[V2]]
 ;
 ; FORCE_SLP-LABEL: @test(
-; FORCE_SLP-NEXT:    [[X0:%.*]] = load i8, i8* [[X:%.*]], align 4
-; FORCE_SLP-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, i8* [[X]], i64 1
-; FORCE_SLP-NEXT:    [[X1:%.*]] = load i8, i8* [[G1]], align 4
+; FORCE_SLP-NEXT:    [[X0:%.*]] = load i8, ptr [[X:%.*]], align 4
+; FORCE_SLP-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 1
+; FORCE_SLP-NEXT:    [[X1:%.*]] = load i8, ptr [[G1]], align 4
 ; FORCE_SLP-NEXT:    [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0
 ; FORCE_SLP-NEXT:    [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1
 ; FORCE_SLP-NEXT:    [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]]
 ; FORCE_SLP-NEXT:    ret <4 x i8> [[V2]]
 ;
-  %x0 = load i8, i8* %x, align 4
-  %g1 = getelementptr inbounds i8, i8* %x, i64 1
-  %x1 = load i8, i8* %g1, align 4
+  %x0 = load i8, ptr %x, align 4
+  %g1 = getelementptr inbounds i8, ptr %x, i64 1
+  %x1 = load i8, ptr %g1, align 4
   %v0 = insertelement <4 x i8> %v, i8 %x0, i64 0
   %v1 = insertelement <4 x i8> %v0, i8 %x1, i64 1
   %v2 = add <4 x i8> %v0, %v1
   ret <4 x i8> %v2
 }
 
-define  <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
+define  <2 x i8> @test2(<2 x i8> %t6, ptr %t1) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; CHECK-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
 ; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -43,9 +43,9 @@ define  <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
 ; CHECK-NEXT:    ret <2 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test2(
-; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
 ; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -53,9 +53,9 @@ define  <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
 ; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <2 x i8> [[T11]]
 ;
-  %t3 = load i32, i32* %t1, align 4
-  %t4 = getelementptr inbounds i32, i32* %t1, i64 1
-  %t5 = load i32, i32* %t4, align 4
+  %t3 = load i32, ptr %t1, align 4
+  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
+  %t5 = load i32, ptr %t4, align 4
   %t7 = trunc i32 %t3 to i8
   %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 0
   %t9 = trunc i32 %t5 to i8
@@ -64,11 +64,11 @@ define  <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
   ret <2 x i8> %t11
 }
 
-define  <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
+define  <2 x i8> @test_reorder(<2 x i8> %t6, ptr %t1) {
 ; CHECK-LABEL: @test_reorder(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; CHECK-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
 ; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -77,9 +77,9 @@ define  <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
 ; CHECK-NEXT:    ret <2 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test_reorder(
-; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
 ; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -87,9 +87,9 @@ define  <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
 ; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <2 x i8> [[T11]]
 ;
-  %t3 = load i32, i32* %t1, align 4
-  %t4 = getelementptr inbounds i32, i32* %t1, i64 1
-  %t5 = load i32, i32* %t4, align 4
+  %t3 = load i32, ptr %t1, align 4
+  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
+  %t5 = load i32, ptr %t4, align 4
   %t7 = trunc i32 %t3 to i8
   %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 1
   %t9 = trunc i32 %t5 to i8
@@ -98,11 +98,11 @@ define  <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
   ret <2 x i8> %t11
 }
 
-define  <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
+define  <4 x i8> @test_subvector(<4 x i8> %t6, ptr %t1) {
 ; CHECK-LABEL: @test_subvector(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; CHECK-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
 ; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -111,9 +111,9 @@ define  <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
 ; CHECK-NEXT:    ret <4 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test_subvector(
-; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
 ; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -121,9 +121,9 @@ define  <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
 ; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <4 x i8> [[T11]]
 ;
-  %t3 = load i32, i32* %t1, align 4
-  %t4 = getelementptr inbounds i32, i32* %t1, i64 1
-  %t5 = load i32, i32* %t4, align 4
+  %t3 = load i32, ptr %t1, align 4
+  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
+  %t5 = load i32, ptr %t4, align 4
   %t7 = trunc i32 %t3 to i8
   %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 0
   %t9 = trunc i32 %t5 to i8
@@ -132,11 +132,11 @@ define  <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
   ret <4 x i8> %t11
 }
 
-define  <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) {
+define  <4 x i8> @test_subvector_reorder(<4 x i8> %t6, ptr %t1) {
 ; CHECK-LABEL: @test_subvector_reorder(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; CHECK-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
 ; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -145,9 +145,9 @@ define  <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) {
 ; CHECK-NEXT:    ret <4 x i8> [[T11]]
 ;
 ; FORCE_SLP-LABEL: @test_subvector_reorder(
-; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
-; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
-; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
+; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
+; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
+; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
 ; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
 ; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
 ; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
@@ -155,9 +155,9 @@ define  <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) {
 ; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
 ; FORCE_SLP-NEXT:    ret <4 x i8> [[T11]]
 ;
-  %t3 = load i32, i32* %t1, align 4
-  %t4 = getelementptr inbounds i32, i32* %t1, i64 1
-  %t5 = load i32, i32* %t4, align 4
+  %t3 = load i32, ptr %t1, align 4
+  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
+  %t5 = load i32, ptr %t4, align 4
   %t7 = trunc i32 %t3 to i8
   %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 3
   %t9 = trunc i32 %t5 to i8

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr54465.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr54465.ll
index 40ce18bbe72e2..c98db2dcc1f68 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr54465.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr54465.ll
@@ -14,17 +14,17 @@ target triple = "x86_64-unknown-linux"
 define dso_local i32 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, i32* @e, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @f, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @e, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @f, align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP2:%.*]] = load volatile i32, i32* @d, align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load volatile i32, i32* @c, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load volatile i32, ptr @d, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load volatile i32, ptr @c, align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[LOR_RHS:%.*]], label [[LOR_END:%.*]]
 ; CHECK:       lor.rhs:
-; CHECK-NEXT:    [[TMP4:%.*]] = load volatile i32, i32* @d, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load volatile i32, ptr @d, align 4
 ; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP4]], 0
 ; CHECK-NEXT:    [[PHI_CAST:%.*]] = zext i1 [[TOBOOL1]] to i32
 ; CHECK-NEXT:    br label [[LOR_END]]
@@ -33,38 +33,38 @@ define dso_local i32 @main() {
 ; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    [[LAND_EXT:%.*]] = zext i1 [[TOBOOL2]] to i32
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP5]], [[LAND_EXT]]
-; CHECK-NEXT:    store volatile i32 [[OR]], i32* @b, align 4
+; CHECK-NEXT:    store volatile i32 [[OR]], ptr @b, align 4
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load volatile i32, i32* @d, align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load volatile i32, ptr @d, align 4
 ; CHECK-NEXT:    [[TOBOOL4_NOT:%.*]] = icmp eq i32 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL4_NOT]], label [[LAND_END7:%.*]], label [[LAND_RHS5:%.*]]
 ; CHECK:       land.rhs5:
-; CHECK-NEXT:    [[TMP7:%.*]] = load volatile i32, i32* @c, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load volatile i32, ptr @c, align 4
 ; CHECK-NEXT:    [[TOBOOL6:%.*]] = icmp ne i32 [[TMP7]], 0
 ; CHECK-NEXT:    [[PHI_CAST11:%.*]] = zext i1 [[TOBOOL6]] to i32
 ; CHECK-NEXT:    br label [[LAND_END7]]
 ; CHECK:       land.end7:
 ; CHECK-NEXT:    [[TMP8:%.*]] = phi i32 [ 0, [[LOR_END]] ], [ [[PHI_CAST11]], [[LAND_RHS5]] ]
 ; CHECK-NEXT:    [[OR9:%.*]] = or i32 [[TMP8]], [[DIV]]
-; CHECK-NEXT:    store i32 [[OR9]], i32* @a, align 4
+; CHECK-NEXT:    store i32 [[OR9]], ptr @a, align 4
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %0 = load volatile i32, i32* @e, align 4
-  %1 = load i32, i32* @f, align 4
+  %0 = load volatile i32, ptr @e, align 4
+  %1 = load i32, ptr @f, align 4
   %cmp = icmp sgt i32 %0, %1
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load volatile i32, i32* @d, align 4
-  %3 = load volatile i32, i32* @c, align 4
+  %2 = load volatile i32, ptr @d, align 4
+  %3 = load volatile i32, ptr @c, align 4
   %tobool.not = icmp eq i32 %3, 0
   br i1 %tobool.not, label %lor.rhs, label %lor.end
 
 lor.rhs:                                          ; preds = %if.then
-  %4 = load volatile i32, i32* @d, align 4
+  %4 = load volatile i32, ptr @d, align 4
   %tobool1 = icmp ne i32 %4, 0
   %phi.cast = zext i1 %tobool1 to i32
   br label %lor.end
@@ -74,14 +74,14 @@ lor.end:                                          ; preds = %lor.rhs, %if.then
   %tobool2 = icmp ne i32 %1, 0
   %land.ext = zext i1 %tobool2 to i32
   %or = or i32 %5, %land.ext
-  store volatile i32 %or, i32* @b, align 4
+  store volatile i32 %or, ptr @b, align 4
   %div = sdiv i32 %1, %2
-  %6 = load volatile i32, i32* @d, align 4
+  %6 = load volatile i32, ptr @d, align 4
   %tobool4.not = icmp eq i32 %6, 0
   br i1 %tobool4.not, label %land.end7, label %land.rhs5
 
 land.rhs5:                                        ; preds = %lor.end
-  %7 = load volatile i32, i32* @c, align 4
+  %7 = load volatile i32, ptr @c, align 4
   %tobool6 = icmp ne i32 %7, 0
   %phi.cast11 = zext i1 %tobool6 to i32
   br label %land.end7
@@ -89,7 +89,7 @@ land.rhs5:                                        ; preds = %lor.end
 land.end7:                                        ; preds = %land.rhs5, %lor.end
   %8 = phi i32 [ 0, %lor.end ], [ %phi.cast11, %land.rhs5 ]
   %or9 = or i32 %8, %div
-  store i32 %or9, i32* @a, align 4
+  store i32 %or9, ptr @a, align 4
   br label %if.end
 
 if.end:                                           ; preds = %land.end7, %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index eff23bc7ceb63..6f70aa5f4f6af 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -8,491 +8,431 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
 
-define void @exact(i32* %x) {
+define void @exact(ptr %x) {
 ; CHECK-LABEL: @exact(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = lshr exact i32 %load1, 1
   %op2 = lshr exact i32 %load2, 1
   %op3 = lshr exact i32 %load3, 1
   %op4 = lshr exact i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @not_exact(i32* %x) {
+define void @not_exact(ptr %x) {
 ; CHECK-LABEL: @not_exact(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = lshr exact i32 %load1, 1
   %op2 = lshr i32 %load2, 1
   %op3 = lshr exact i32 %load3, 1
   %op4 = lshr exact i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @nsw(i32* %x) {
+define void @nsw(ptr %x) {
 ; CHECK-LABEL: @nsw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = add nsw i32 %load2, 1
   %op3 = add nsw i32 %load3, 1
   %op4 = add nsw i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @not_nsw(i32* %x) {
+define void @not_nsw(ptr %x) {
 ; CHECK-LABEL: @not_nsw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = add nsw i32 %load2, 1
   %op3 = add nsw i32 %load3, 1
   %op4 = add i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @nuw(i32* %x) {
+define void @nuw(ptr %x) {
 ; CHECK-LABEL: @nuw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add nuw i32 %load2, 1
   %op3 = add nuw i32 %load3, 1
   %op4 = add nuw i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @not_nuw(i32* %x) {
+define void @not_nuw(ptr %x) {
 ; CHECK-LABEL: @not_nuw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add i32 %load2, 1
   %op3 = add i32 %load3, 1
   %op4 = add nuw i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @not_nsw_but_nuw(i32* %x) {
+define void @not_nsw_but_nuw(ptr %x) {
 ; CHECK-LABEL: @not_nsw_but_nuw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add nuw nsw i32 %load2, 1
   %op3 = add nuw nsw i32 %load3, 1
   %op4 = add nuw i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @nnan(float* %x) {
+define void @nnan(ptr %x) {
 ; CHECK-LABEL: @nnan(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds float, float* %x, i64 0
-  %idx2 = getelementptr inbounds float, float* %x, i64 1
-  %idx3 = getelementptr inbounds float, float* %x, i64 2
-  %idx4 = getelementptr inbounds float, float* %x, i64 3
+  %idx2 = getelementptr inbounds float, ptr %x, i64 1
+  %idx3 = getelementptr inbounds float, ptr %x, i64 2
+  %idx4 = getelementptr inbounds float, ptr %x, i64 3
 
-  %load1 = load float, float* %idx1, align 4
-  %load2 = load float, float* %idx2, align 4
-  %load3 = load float, float* %idx3, align 4
-  %load4 = load float, float* %idx4, align 4
+  %load1 = load float, ptr %x, align 4
+  %load2 = load float, ptr %idx2, align 4
+  %load3 = load float, ptr %idx3, align 4
+  %load4 = load float, ptr %idx4, align 4
 
   %op1 = fadd fast nnan float %load1, 1.0
   %op2 = fadd nnan ninf float %load2, 1.0
   %op3 = fadd nsz nnan float %load3, 1.0
   %op4 = fadd arcp nnan float %load4, 1.0
 
-  store float %op1, float* %idx1, align 4
-  store float %op2, float* %idx2, align 4
-  store float %op3, float* %idx3, align 4
-  store float %op4, float* %idx4, align 4
+  store float %op1, ptr %x, align 4
+  store float %op2, ptr %idx2, align 4
+  store float %op3, ptr %idx3, align 4
+  store float %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @not_nnan(float* %x) {
+define void @not_nnan(ptr %x) {
 ; CHECK-LABEL: @not_nnan(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds float, float* %x, i64 0
-  %idx2 = getelementptr inbounds float, float* %x, i64 1
-  %idx3 = getelementptr inbounds float, float* %x, i64 2
-  %idx4 = getelementptr inbounds float, float* %x, i64 3
+  %idx2 = getelementptr inbounds float, ptr %x, i64 1
+  %idx3 = getelementptr inbounds float, ptr %x, i64 2
+  %idx4 = getelementptr inbounds float, ptr %x, i64 3
 
-  %load1 = load float, float* %idx1, align 4
-  %load2 = load float, float* %idx2, align 4
-  %load3 = load float, float* %idx3, align 4
-  %load4 = load float, float* %idx4, align 4
+  %load1 = load float, ptr %x, align 4
+  %load2 = load float, ptr %idx2, align 4
+  %load3 = load float, ptr %idx3, align 4
+  %load4 = load float, ptr %idx4, align 4
 
   %op1 = fadd nnan float %load1, 1.0
   %op2 = fadd ninf float %load2, 1.0
   %op3 = fadd nsz float %load3, 1.0
   %op4 = fadd arcp float %load4, 1.0
 
-  store float %op1, float* %idx1, align 4
-  store float %op2, float* %idx2, align 4
-  store float %op3, float* %idx3, align 4
-  store float %op4, float* %idx4, align 4
+  store float %op1, ptr %x, align 4
+  store float %op2, ptr %idx2, align 4
+  store float %op3, ptr %idx3, align 4
+  store float %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @only_fast(float* %x) {
+define void @only_fast(ptr %x) {
 ; CHECK-LABEL: @only_fast(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds float, float* %x, i64 0
-  %idx2 = getelementptr inbounds float, float* %x, i64 1
-  %idx3 = getelementptr inbounds float, float* %x, i64 2
-  %idx4 = getelementptr inbounds float, float* %x, i64 3
+  %idx2 = getelementptr inbounds float, ptr %x, i64 1
+  %idx3 = getelementptr inbounds float, ptr %x, i64 2
+  %idx4 = getelementptr inbounds float, ptr %x, i64 3
 
-  %load1 = load float, float* %idx1, align 4
-  %load2 = load float, float* %idx2, align 4
-  %load3 = load float, float* %idx3, align 4
-  %load4 = load float, float* %idx4, align 4
+  %load1 = load float, ptr %x, align 4
+  %load2 = load float, ptr %idx2, align 4
+  %load3 = load float, ptr %idx3, align 4
+  %load4 = load float, ptr %idx4, align 4
 
   %op1 = fadd fast nnan float %load1, 1.0
   %op2 = fadd fast nnan ninf float %load2, 1.0
   %op3 = fadd fast nsz nnan float %load3, 1.0
   %op4 = fadd arcp nnan fast float %load4, 1.0
 
-  store float %op1, float* %idx1, align 4
-  store float %op2, float* %idx2, align 4
-  store float %op3, float* %idx3, align 4
-  store float %op4, float* %idx4, align 4
+  store float %op1, ptr %x, align 4
+  store float %op2, ptr %idx2, align 4
+  store float %op3, ptr %idx3, align 4
+  store float %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @only_arcp(float* %x) {
+define void @only_arcp(ptr %x) {
 ; CHECK-LABEL: @only_arcp(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds float, float* %x, i64 0
-  %idx2 = getelementptr inbounds float, float* %x, i64 1
-  %idx3 = getelementptr inbounds float, float* %x, i64 2
-  %idx4 = getelementptr inbounds float, float* %x, i64 3
+  %idx2 = getelementptr inbounds float, ptr %x, i64 1
+  %idx3 = getelementptr inbounds float, ptr %x, i64 2
+  %idx4 = getelementptr inbounds float, ptr %x, i64 3
 
-  %load1 = load float, float* %idx1, align 4
-  %load2 = load float, float* %idx2, align 4
-  %load3 = load float, float* %idx3, align 4
-  %load4 = load float, float* %idx4, align 4
+  %load1 = load float, ptr %x, align 4
+  %load2 = load float, ptr %idx2, align 4
+  %load3 = load float, ptr %idx3, align 4
+  %load4 = load float, ptr %idx4, align 4
 
   %op1 = fadd fast float %load1, 1.0
   %op2 = fadd fast float %load2, 1.0
   %op3 = fadd fast float %load3, 1.0
   %op4 = fadd arcp float %load4, 1.0
 
-  store float %op1, float* %idx1, align 4
-  store float %op2, float* %idx2, align 4
-  store float %op3, float* %idx3, align 4
-  store float %op4, float* %idx4, align 4
+  store float %op1, ptr %x, align 4
+  store float %op2, ptr %idx2, align 4
+  store float %op3, ptr %idx3, align 4
+  store float %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @addsub_all_nsw(i32* %x) {
+define void @addsub_all_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_all_nsw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
   %op3 = add nsw i32 %load3, 1
   %op4 = sub nsw i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @addsub_some_nsw(i32* %x) {
+define void @addsub_some_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_some_nsw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
   %op3 = add nsw i32 %load3, 1
   %op4 = sub i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @addsub_no_nsw(i32* %x) {
+define void @addsub_no_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_no_nsw(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds i32, i32* %x, i64 0
-  %idx2 = getelementptr inbounds i32, i32* %x, i64 1
-  %idx3 = getelementptr inbounds i32, i32* %x, i64 2
-  %idx4 = getelementptr inbounds i32, i32* %x, i64 3
+  %idx2 = getelementptr inbounds i32, ptr %x, i64 1
+  %idx3 = getelementptr inbounds i32, ptr %x, i64 2
+  %idx4 = getelementptr inbounds i32, ptr %x, i64 3
 
-  %load1 = load i32, i32* %idx1, align 4
-  %load2 = load i32, i32* %idx2, align 4
-  %load3 = load i32, i32* %idx3, align 4
-  %load4 = load i32, i32* %idx4, align 4
+  %load1 = load i32, ptr %x, align 4
+  %load2 = load i32, ptr %idx2, align 4
+  %load3 = load i32, ptr %idx3, align 4
+  %load4 = load i32, ptr %idx4, align 4
 
   %op1 = add i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
   %op3 = add nsw i32 %load3, 1
   %op4 = sub i32 %load4, 1
 
-  store i32 %op1, i32* %idx1, align 4
-  store i32 %op2, i32* %idx2, align 4
-  store i32 %op3, i32* %idx3, align 4
-  store i32 %op4, i32* %idx4, align 4
+  store i32 %op1, ptr %x, align 4
+  store i32 %op2, ptr %idx2, align 4
+  store i32 %op3, ptr %idx3, align 4
+  store i32 %op4, ptr %idx4, align 4
 
   ret void
 }
 
-define void @fcmp_fast(double* %x) #1 {
+define void @fcmp_fast(ptr %x) #1 {
 ; CHECK-LABEL: @fcmp_fast(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds double, double* %x, i64 0
-  %idx2 = getelementptr inbounds double, double* %x, i64 1
+  %idx2 = getelementptr inbounds double, ptr %x, i64 1
 
-  %load1 = load double, double* %idx1, align 8
-  %load2 = load double, double* %idx2, align 8
+  %load1 = load double, ptr %x, align 8
+  %load2 = load double, ptr %idx2, align 8
 
   %cmp1 = fcmp fast oge double %load1, 0.000000e+00
   %cmp2 = fcmp fast oge double %load2, 0.000000e+00
@@ -503,29 +443,25 @@ define void @fcmp_fast(double* %x) #1 {
   %sel1 = select i1 %cmp1, double %load1, double %sub1
   %sel2 = select i1 %cmp2, double %load2, double %sub2
 
-  store double %sel1, double* %idx1, align 8
-  store double %sel2, double* %idx2, align 8
+  store double %sel1, ptr %x, align 8
+  store double %sel2, ptr %idx2, align 8
 
   ret void
 }
 
-define void @fcmp_fast_unary_fneg(double* %x) #1 {
+define void @fcmp_fast_unary_fneg(ptr %x) #1 {
 ; CHECK-LABEL: @fcmp_fast_unary_fneg(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds double, double* %x, i64 0
-  %idx2 = getelementptr inbounds double, double* %x, i64 1
+  %idx2 = getelementptr inbounds double, ptr %x, i64 1
 
-  %load1 = load double, double* %idx1, align 8
-  %load2 = load double, double* %idx2, align 8
+  %load1 = load double, ptr %x, align 8
+  %load2 = load double, ptr %idx2, align 8
 
   %cmp1 = fcmp fast oge double %load1, 0.000000e+00
   %cmp2 = fcmp fast oge double %load2, 0.000000e+00
@@ -536,29 +472,25 @@ define void @fcmp_fast_unary_fneg(double* %x) #1 {
   %sel1 = select i1 %cmp1, double %load1, double %sub1
   %sel2 = select i1 %cmp2, double %load2, double %sub2
 
-  store double %sel1, double* %idx1, align 8
-  store double %sel2, double* %idx2, align 8
+  store double %sel1, ptr %x, align 8
+  store double %sel2, ptr %idx2, align 8
 
   ret void
 }
 
-define void @fcmp_no_fast(double* %x) #1 {
+define void @fcmp_no_fast(ptr %x) #1 {
 ; CHECK-LABEL: @fcmp_no_fast(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds double, double* %x, i64 0
-  %idx2 = getelementptr inbounds double, double* %x, i64 1
+  %idx2 = getelementptr inbounds double, ptr %x, i64 1
 
-  %load1 = load double, double* %idx1, align 8
-  %load2 = load double, double* %idx2, align 8
+  %load1 = load double, ptr %x, align 8
+  %load2 = load double, ptr %idx2, align 8
 
   %cmp1 = fcmp fast oge double %load1, 0.000000e+00
   %cmp2 = fcmp oge double %load2, 0.000000e+00
@@ -569,29 +501,25 @@ define void @fcmp_no_fast(double* %x) #1 {
   %sel1 = select i1 %cmp1, double %load1, double %sub1
   %sel2 = select i1 %cmp2, double %load2, double %sub2
 
-  store double %sel1, double* %idx1, align 8
-  store double %sel2, double* %idx2, align 8
+  store double %sel1, ptr %x, align 8
+  store double %sel2, ptr %idx2, align 8
 
   ret void
 }
 
-define void @fcmp_no_fast_unary_fneg(double* %x) #1 {
+define void @fcmp_no_fast_unary_fneg(ptr %x) #1 {
 ; CHECK-LABEL: @fcmp_no_fast_unary_fneg(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg <2 x double> [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds double, double* %x, i64 0
-  %idx2 = getelementptr inbounds double, double* %x, i64 1
+  %idx2 = getelementptr inbounds double, ptr %x, i64 1
 
-  %load1 = load double, double* %idx1, align 8
-  %load2 = load double, double* %idx2, align 8
+  %load1 = load double, ptr %x, align 8
+  %load2 = load double, ptr %idx2, align 8
 
   %cmp1 = fcmp fast oge double %load1, 0.000000e+00
   %cmp2 = fcmp oge double %load2, 0.000000e+00
@@ -602,60 +530,52 @@ define void @fcmp_no_fast_unary_fneg(double* %x) #1 {
   %sel1 = select i1 %cmp1, double %load1, double %sub1
   %sel2 = select i1 %cmp2, double %load2, double %sub2
 
-  store double %sel1, double* %idx1, align 8
-  store double %sel2, double* %idx2, align 8
+  store double %sel1, ptr %x, align 8
+  store double %sel2, ptr %idx2, align 8
 
   ret void
 }
 
 declare double @llvm.fabs.f64(double) nounwind readnone
 
-define void @call_fast(double* %x) {
+define void @call_fast(ptr %x) {
 ; CHECK-LABEL: @call_fast(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds double, double* %x, i64 0
-  %idx2 = getelementptr inbounds double, double* %x, i64 1
+  %idx2 = getelementptr inbounds double, ptr %x, i64 1
 
-  %load1 = load double, double* %idx1, align 8
-  %load2 = load double, double* %idx2, align 8
+  %load1 = load double, ptr %x, align 8
+  %load2 = load double, ptr %idx2, align 8
 
   %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
   %call2 = tail call fast double @llvm.fabs.f64(double %load2) nounwind readnone
 
-  store double %call1, double* %idx1, align 8
-  store double %call2, double* %idx2, align 8
+  store double %call1, ptr %x, align 8
+  store double %call2, ptr %idx2, align 8
 
   ret void
 }
 
-define void @call_no_fast(double* %x) {
+define void @call_no_fast(ptr %x) {
 ; CHECK-LABEL: @call_no_fast(
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %idx1 = getelementptr inbounds double, double* %x, i64 0
-  %idx2 = getelementptr inbounds double, double* %x, i64 1
+  %idx2 = getelementptr inbounds double, ptr %x, i64 1
 
-  %load1 = load double, double* %idx1, align 8
-  %load2 = load double, double* %idx2, align 8
+  %load1 = load double, ptr %x, align 8
+  %load2 = load double, ptr %idx2, align 8
 
   %call1 = tail call fast double @llvm.fabs.f64(double %load1) nounwind readnone
   %call2 = tail call double @llvm.fabs.f64(double %load2) nounwind readnone
 
-  store double %call1, double* %idx1, align 8
-  store double %call2, double* %idx2, align 8
+  store double %call1, ptr %x, align 8
+  store double %call2, ptr %idx2, align 8
 
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
index 182de06319ac7..46ebaa5a69dcf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reassociated-loads.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=reassociate,slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
 
-define signext i8 @Foo(<32 x i8>* %__v) {
+define signext i8 @Foo(ptr %__v) {
 ; CHECK-LABEL: @Foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <32 x i8>, ptr [[__V:%.*]], align 32
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> [[TMP0]])
 ; CHECK-NEXT:    ret i8 [[TMP1]]
 ;
 entry:
-  %0 = load <32 x i8>, <32 x i8>* %__v, align 32
+  %0 = load <32 x i8>, ptr %__v, align 32
   %vecext.i.i.i = extractelement <32 x i8> %0, i64 0
   %vecext.i.i.1.i = extractelement <32 x i8> %0, i64 1
   %add.i.1.i = add i8 %vecext.i.i.1.i, %vecext.i.i.i

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index ea94a95a64f1a..ceee30a3360e2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -4,14 +4,14 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.8.0"
 
-; int foo(double *A, int n, int m) {
+; int foo(ptr A, int n, int m) {
 ;   double sum = 0, v1 = 2, v0 = 3;
 ;   for (int i=0; i < n; ++i)
 ;     sum += 7*A[i*2] + 7*A[i*2+1];
 ;   return sum;
 ; }
 
-define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) {
+define i32 @reduce(ptr nocapture %A, i32 %n, i32 %m) {
 ; CHECK-LABEL: @reduce(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -20,9 +20,8 @@ define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[I_015:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SUM_014:%.*]] = phi double [ [[ADD6:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[I_015]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i32 [[MUL]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 [[MUL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 7.000000e+00, double 7.000000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
@@ -46,12 +45,12 @@ for.body:                                         ; preds = %entry, %for.body
   %i.015 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %sum.014 = phi double [ %add6, %for.body ], [ 0.000000e+00, %entry ]
   %mul = shl nsw i32 %i.015, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i32 %mul
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %A, i32 %mul
+  %0 = load double, ptr %arrayidx, align 4
   %mul1 = fmul double %0, 7.000000e+00
   %add12 = or i32 %mul, 1
-  %arrayidx3 = getelementptr inbounds double, double* %A, i32 %add12
-  %1 = load double, double* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds double, ptr %A, i32 %add12
+  %1 = load double, ptr %arrayidx3, align 4
   %mul4 = fmul double %1, 7.000000e+00
   %add5 = fadd double %mul1, %mul4
   %add6 = fadd double %sum.014, %add5
@@ -71,37 +70,34 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; PR43948 - https://bugs.llvm.org/show_bug.cgi?id=43948
 ; The extra use of a non-vectorized element of a reduction must not be killed.
 
-define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) {
+define i32 @horiz_max_multiple_uses(ptr %x, ptr %p) {
 ; CHECK-LABEL: @horiz_max_multiple_uses(
-; CHECK-NEXT:    [[X0:%.*]] = getelementptr [32 x i32], [32 x i32]* [[X:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[X4:%.*]] = getelementptr [32 x i32], [32 x i32]* [[X]], i64 0, i64 4
-; CHECK-NEXT:    [[X5:%.*]] = getelementptr [32 x i32], [32 x i32]* [[X]], i64 0, i64 5
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[X0]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = load i32, i32* [[X4]], align 4
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[X5]], align 4
+; CHECK-NEXT:    [[X4:%.*]] = getelementptr [32 x i32], ptr [[X:%.*]], i64 0, i64 4
+; CHECK-NEXT:    [[X5:%.*]] = getelementptr [32 x i32], ptr [[X]], i64 0, i64 5
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = load i32, ptr [[X4]], align 4
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[X5]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
 ; CHECK-NEXT:    [[MAX_ROOT_CMP:%.*]] = icmp sgt i32 [[TMP3]], [[T4]]
 ; CHECK-NEXT:    [[MAX_ROOT_SEL:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 [[TMP3]], i32 [[T4]]
 ; CHECK-NEXT:    [[C012345:%.*]] = icmp sgt i32 [[MAX_ROOT_SEL]], [[T5]]
 ; CHECK-NEXT:    [[T17:%.*]] = select i1 [[C012345]], i32 [[MAX_ROOT_SEL]], i32 [[T5]]
 ; CHECK-NEXT:    [[THREE_OR_FOUR:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 3, i32 4
-; CHECK-NEXT:    store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8
+; CHECK-NEXT:    store i32 [[THREE_OR_FOUR]], ptr [[P:%.*]], align 8
 ; CHECK-NEXT:    ret i32 [[T17]]
 ;
-  %x0 = getelementptr [32 x i32], [32 x i32]* %x, i64 0, i64 0
-  %x1 = getelementptr [32 x i32], [32 x i32]* %x, i64 0, i64 1
-  %x2 = getelementptr [32 x i32], [32 x i32]* %x, i64 0, i64 2
-  %x3 = getelementptr [32 x i32], [32 x i32]* %x, i64 0, i64 3
-  %x4 = getelementptr [32 x i32], [32 x i32]* %x, i64 0, i64 4
-  %x5 = getelementptr [32 x i32], [32 x i32]* %x, i64 0, i64 5
+  %x1 = getelementptr [32 x i32], ptr %x, i64 0, i64 1
+  %x2 = getelementptr [32 x i32], ptr %x, i64 0, i64 2
+  %x3 = getelementptr [32 x i32], ptr %x, i64 0, i64 3
+  %x4 = getelementptr [32 x i32], ptr %x, i64 0, i64 4
+  %x5 = getelementptr [32 x i32], ptr %x, i64 0, i64 5
 
-  %t0 = load i32, i32* %x0
-  %t1 = load i32, i32* %x1
-  %t2 = load i32, i32* %x2
-  %t3 = load i32, i32* %x3
-  %t4 = load i32, i32* %x4
-  %t5 = load i32, i32* %x5
+  %t0 = load i32, ptr %x
+  %t1 = load i32, ptr %x1
+  %t2 = load i32, ptr %x2
+  %t3 = load i32, ptr %x3
+  %t4 = load i32, ptr %x4
+  %t5 = load i32, ptr %x5
 
   %c01 = icmp sgt i32 %t0, %t1
   %s5 = select i1 %c01, i32 %t0, i32 %t1
@@ -114,32 +110,30 @@ define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) {
   %c012345 = icmp sgt i32 %MAX_ROOT_SEL, %t5
   %t17 = select i1 %c012345, i32 %MAX_ROOT_SEL, i32 %t5
   %three_or_four = select i1 %MAX_ROOT_CMP, i32 3, i32 4
-  store i32 %three_or_four, i32* %p, align 8
+  store i32 %three_or_four, ptr %p, align 8
   ret i32 %t17
 }
 
 ; This is a miscompile (see the undef operand) and/or test for invalid IR.
 
-define i1 @bad_insertpoint_rdx([8 x i32]* %p) #0 {
+define i1 @bad_insertpoint_rdx(ptr %p) #0 {
 ; CHECK-LABEL: @bad_insertpoint_rdx(
-; CHECK-NEXT:    [[ARRAYIDX22:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[P:%.*]], i64 0, i64 0
-; CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[ARRAYIDX22]], align 16
+; CHECK-NEXT:    [[T0:%.*]] = load i32, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    [[CMP23:%.*]] = icmp sgt i32 [[T0]], 0
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP23]], i32 [[T0]], i32 0
-; CHECK-NEXT:    [[ARRAYIDX22_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[P]], i64 0, i64 1
-; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[ARRAYIDX22_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX22_1:%.*]] = getelementptr inbounds [8 x i32], ptr [[P]], i64 0, i64 1
+; CHECK-NEXT:    [[T1:%.*]] = load i32, ptr [[ARRAYIDX22_1]], align 4
 ; CHECK-NEXT:    [[CMP23_1:%.*]] = icmp sgt i32 [[T1]], [[SPEC_SELECT]]
 ; CHECK-NEXT:    [[SPEC_STORE_SELECT87:%.*]] = zext i1 [[CMP23_1]] to i32
 ; CHECK-NEXT:    [[SPEC_SELECT88:%.*]] = select i1 [[CMP23_1]], i32 [[T1]], i32 [[SPEC_SELECT]]
 ; CHECK-NEXT:    [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[SPEC_SELECT88]]
 ; CHECK-NEXT:    ret i1 [[CMP23_2]]
 ;
-  %arrayidx22 = getelementptr inbounds [8 x i32], [8 x i32]* %p, i64 0, i64 0
-  %t0 = load i32, i32* %arrayidx22, align 16
+  %t0 = load i32, ptr %p, align 16
   %cmp23 = icmp sgt i32 %t0, 0
   %spec.select = select i1 %cmp23, i32 %t0, i32 0
-  %arrayidx22.1 = getelementptr inbounds [8 x i32], [8 x i32]* %p, i64 0, i64 1
-  %t1 = load i32, i32* %arrayidx22.1, align 4
+  %arrayidx22.1 = getelementptr inbounds [8 x i32], ptr %p, i64 0, i64 1
+  %t1 = load i32, ptr %arrayidx22.1, align 4
   %cmp23.1 = icmp sgt i32 %t1, %spec.select
   %spec.store.select87 = zext i1 %cmp23.1 to i32
   %spec.select88 = select i1 %cmp23.1, i32 %t1, i32 %spec.select

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
index 6c3e14d9f6bc2..8b2b15283601a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
@@ -11,7 +11,7 @@
 ;  for (int y = 0; y < 2; y++) {
 ;    // Inner loop gets unrolled
 ;    for (int x = 0; x < 8; x++) {
-;      sum += p[x] * 42;
+;      sum += pptr 42;
 ;    }
 ;    // Dummy call to keep outer loop alive
 ;    foo();
@@ -19,14 +19,13 @@
 ;  return sum;
 ;}
 
-define i32 @test(i32* nocapture readonly %p) {
+define i32 @test(ptr nocapture readonly %p) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[SUM]]
@@ -35,39 +34,39 @@ define i32 @test(i32* nocapture readonly %p) {
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
-  %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7
+  %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %arrayidx.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %arrayidx.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %arrayidx.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %arrayidx.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %arrayidx.7 = getelementptr inbounds i32, ptr %p, i64 7
   br label %for.body
 
 for.body:
   %sum = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
-  %tmp = load i32, i32* %p, align 4
+  %tmp = load i32, ptr %p, align 4
   %mul = mul i32 %tmp, 42
   %add = add i32 %mul, %sum
-  %tmp5 = load i32, i32* %arrayidx.1, align 4
+  %tmp5 = load i32, ptr %arrayidx.1, align 4
   %mul.1 = mul i32 %tmp5, 42
   %add.1 = add i32 %mul.1, %add
-  %tmp6 = load i32, i32* %arrayidx.2, align 4
+  %tmp6 = load i32, ptr %arrayidx.2, align 4
   %mul.2 = mul i32 %tmp6, 42
   %add.2 = add i32 %mul.2, %add.1
-  %tmp7 = load i32, i32* %arrayidx.3, align 4
+  %tmp7 = load i32, ptr %arrayidx.3, align 4
   %mul.3 = mul i32 %tmp7, 42
   %add.3 = add i32 %mul.3, %add.2
-  %tmp8 = load i32, i32* %arrayidx.4, align 4
+  %tmp8 = load i32, ptr %arrayidx.4, align 4
   %mul.4 = mul i32 %tmp8, 42
   %add.4 = add i32 %mul.4, %add.3
-  %tmp9 = load i32, i32* %arrayidx.5, align 4
+  %tmp9 = load i32, ptr %arrayidx.5, align 4
   %mul.5 = mul i32 %tmp9, 42
   %add.5 = add i32 %mul.5, %add.4
-  %tmp10 = load i32, i32* %arrayidx.6, align 4
+  %tmp10 = load i32, ptr %arrayidx.6, align 4
   %mul.6 = mul i32 %tmp10, 42
   %add.6 = add i32 %mul.6, %add.5
-  %tmp11 = load i32, i32* %arrayidx.7, align 4
+  %tmp11 = load i32, ptr %arrayidx.7, align 4
   %mul.7 = mul i32 %tmp11, 42
   %add.7 = add i32 %mul.7, %add.6
   br i1 true, label %for.end, label %for.body
@@ -84,7 +83,7 @@ for.end:
 ;  for (int y = 0; y < 2; y++) {
 ;    // Inner loop gets unrolled
 ;    for (int x = 0; x < 8; x++) {
-;      sum += p[x] * q[x];
+;      sum += pptr q[x];
 ;    }
 ;    // Dummy call to keep outer loop alive
 ;    foo();
@@ -92,16 +91,14 @@ for.end:
 ;  return sum;
 ;}
 
-define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) {
+define i32 @test2(ptr nocapture readonly %p, ptr nocapture readonly %q) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP5]], [[SUM]]
@@ -110,55 +107,55 @@ define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) {
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
-  %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %arrayidx.p.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %arrayidx.p.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %arrayidx.p.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %arrayidx.p.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %arrayidx.p.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %arrayidx.p.7 = getelementptr inbounds i32, i32* %p, i64 7
+  %arrayidx.p.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %arrayidx.p.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %arrayidx.p.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %arrayidx.p.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %arrayidx.p.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %arrayidx.p.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %arrayidx.p.7 = getelementptr inbounds i32, ptr %p, i64 7
 
-  %arrayidx.q.1 = getelementptr inbounds i32, i32* %q, i64 1
-  %arrayidx.q.2 = getelementptr inbounds i32, i32* %q, i64 2
-  %arrayidx.q.3 = getelementptr inbounds i32, i32* %q, i64 3
-  %arrayidx.q.4 = getelementptr inbounds i32, i32* %q, i64 4
-  %arrayidx.q.5 = getelementptr inbounds i32, i32* %q, i64 5
-  %arrayidx.q.6 = getelementptr inbounds i32, i32* %q, i64 6
-  %arrayidx.q.7 = getelementptr inbounds i32, i32* %q, i64 7
+  %arrayidx.q.1 = getelementptr inbounds i32, ptr %q, i64 1
+  %arrayidx.q.2 = getelementptr inbounds i32, ptr %q, i64 2
+  %arrayidx.q.3 = getelementptr inbounds i32, ptr %q, i64 3
+  %arrayidx.q.4 = getelementptr inbounds i32, ptr %q, i64 4
+  %arrayidx.q.5 = getelementptr inbounds i32, ptr %q, i64 5
+  %arrayidx.q.6 = getelementptr inbounds i32, ptr %q, i64 6
+  %arrayidx.q.7 = getelementptr inbounds i32, ptr %q, i64 7
   br label %for.body
 
 for.body:
   %sum = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
-  %tmpp = load i32, i32* %p, align 4
-  %tmpq = load i32, i32* %q, align 4
+  %tmpp = load i32, ptr %p, align 4
+  %tmpq = load i32, ptr %q, align 4
   %mul = mul i32 %tmpp, %tmpq
   %add = add i32 %mul, %sum
-  %tmp5p = load i32, i32* %arrayidx.p.1, align 4
-  %tmp5q = load i32, i32* %arrayidx.q.1, align 4
+  %tmp5p = load i32, ptr %arrayidx.p.1, align 4
+  %tmp5q = load i32, ptr %arrayidx.q.1, align 4
   %mul.1 = mul i32 %tmp5p, %tmp5q
   %add.1 = add i32 %mul.1, %add
-  %tmp6p = load i32, i32* %arrayidx.p.2, align 4
-  %tmp6q = load i32, i32* %arrayidx.q.2, align 4
+  %tmp6p = load i32, ptr %arrayidx.p.2, align 4
+  %tmp6q = load i32, ptr %arrayidx.q.2, align 4
   %mul.2 = mul i32 %tmp6p, %tmp6q
   %add.2 = add i32 %mul.2, %add.1
-  %tmp7p = load i32, i32* %arrayidx.p.3, align 4
-  %tmp7q = load i32, i32* %arrayidx.q.3, align 4
+  %tmp7p = load i32, ptr %arrayidx.p.3, align 4
+  %tmp7q = load i32, ptr %arrayidx.q.3, align 4
   %mul.3 = mul i32 %tmp7p, %tmp7q
   %add.3 = add i32 %mul.3, %add.2
-  %tmp8p = load i32, i32* %arrayidx.p.4, align 4
-  %tmp8q = load i32, i32* %arrayidx.q.4, align 4
+  %tmp8p = load i32, ptr %arrayidx.p.4, align 4
+  %tmp8q = load i32, ptr %arrayidx.q.4, align 4
   %mul.4 = mul i32 %tmp8p, %tmp8q
   %add.4 = add i32 %mul.4, %add.3
-  %tmp9p = load i32, i32* %arrayidx.p.5, align 4
-  %tmp9q = load i32, i32* %arrayidx.q.5, align 4
+  %tmp9p = load i32, ptr %arrayidx.p.5, align 4
+  %tmp9q = load i32, ptr %arrayidx.q.5, align 4
   %mul.5 = mul i32 %tmp9p, %tmp9q
   %add.5 = add i32 %mul.5, %add.4
-  %tmp10p = load i32, i32* %arrayidx.p.6, align 4
-  %tmp10q = load i32, i32* %arrayidx.q.6, align 4
+  %tmp10p = load i32, ptr %arrayidx.p.6, align 4
+  %tmp10q = load i32, ptr %arrayidx.q.6, align 4
   %mul.6 = mul i32 %tmp10p, %tmp10q
   %add.6 = add i32 %mul.6, %add.5
-  %tmp11p = load i32, i32* %arrayidx.p.7, align 4
-  %tmp11q = load i32, i32* %arrayidx.q.7, align 4
+  %tmp11p = load i32, ptr %arrayidx.p.7, align 4
+  %tmp11q = load i32, ptr %arrayidx.q.7, align 4
   %mul.7 = mul i32 %tmp11p, %tmp11q
   %add.7 = add i32 %mul.7, %add.6
   br i1 true, label %for.end, label %for.body
@@ -175,7 +172,7 @@ for.end:
 ;  for (int y = 0; y < 2; y++) {
 ;    // Inner loop gets unrolled
 ;    for (int x = 0; x < 8; x++) {
-;      sum += p[x] * q[7-x];
+;      sum += pptr q[7-x];
 ;    }
 ;    // Dummy call to keep outer loop alive
 ;    foo();
@@ -183,16 +180,14 @@ for.end:
 ;  return sum;
 ;}
 
-define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) {
+define i32 @test3(ptr nocapture readonly %p, ptr nocapture readonly %q) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
@@ -202,55 +197,55 @@ define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) {
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
-  %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %arrayidx.p.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %arrayidx.p.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %arrayidx.p.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %arrayidx.p.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %arrayidx.p.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %arrayidx.p.7 = getelementptr inbounds i32, i32* %p, i64 7
+  %arrayidx.p.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %arrayidx.p.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %arrayidx.p.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %arrayidx.p.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %arrayidx.p.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %arrayidx.p.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %arrayidx.p.7 = getelementptr inbounds i32, ptr %p, i64 7
 
-  %arrayidx.q.1 = getelementptr inbounds i32, i32* %q, i64 1
-  %arrayidx.q.2 = getelementptr inbounds i32, i32* %q, i64 2
-  %arrayidx.q.3 = getelementptr inbounds i32, i32* %q, i64 3
-  %arrayidx.q.4 = getelementptr inbounds i32, i32* %q, i64 4
-  %arrayidx.q.5 = getelementptr inbounds i32, i32* %q, i64 5
-  %arrayidx.q.6 = getelementptr inbounds i32, i32* %q, i64 6
-  %arrayidx.q.7 = getelementptr inbounds i32, i32* %q, i64 7
+  %arrayidx.q.1 = getelementptr inbounds i32, ptr %q, i64 1
+  %arrayidx.q.2 = getelementptr inbounds i32, ptr %q, i64 2
+  %arrayidx.q.3 = getelementptr inbounds i32, ptr %q, i64 3
+  %arrayidx.q.4 = getelementptr inbounds i32, ptr %q, i64 4
+  %arrayidx.q.5 = getelementptr inbounds i32, ptr %q, i64 5
+  %arrayidx.q.6 = getelementptr inbounds i32, ptr %q, i64 6
+  %arrayidx.q.7 = getelementptr inbounds i32, ptr %q, i64 7
   br label %for.body
 
 for.body:
   %sum = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
-  %tmpp = load i32, i32* %p, align 4
-  %tmpq = load i32, i32* %arrayidx.q.7, align 4
+  %tmpp = load i32, ptr %p, align 4
+  %tmpq = load i32, ptr %arrayidx.q.7, align 4
   %mul = mul i32 %tmpp, %tmpq
   %add = add i32 %mul, %sum
-  %tmp5p = load i32, i32* %arrayidx.p.1, align 4
-  %tmp5q = load i32, i32* %arrayidx.q.6, align 4
+  %tmp5p = load i32, ptr %arrayidx.p.1, align 4
+  %tmp5q = load i32, ptr %arrayidx.q.6, align 4
   %mul.1 = mul i32 %tmp5p, %tmp5q
   %add.1 = add i32 %mul.1, %add
-  %tmp6p = load i32, i32* %arrayidx.p.2, align 4
-  %tmp6q = load i32, i32* %arrayidx.q.5, align 4
+  %tmp6p = load i32, ptr %arrayidx.p.2, align 4
+  %tmp6q = load i32, ptr %arrayidx.q.5, align 4
   %mul.2 = mul i32 %tmp6p, %tmp6q
   %add.2 = add i32 %mul.2, %add.1
-  %tmp7p = load i32, i32* %arrayidx.p.3, align 4
-  %tmp7q = load i32, i32* %arrayidx.q.4, align 4
+  %tmp7p = load i32, ptr %arrayidx.p.3, align 4
+  %tmp7q = load i32, ptr %arrayidx.q.4, align 4
   %mul.3 = mul i32 %tmp7p, %tmp7q
   %add.3 = add i32 %mul.3, %add.2
-  %tmp8p = load i32, i32* %arrayidx.p.4, align 4
-  %tmp8q = load i32, i32* %arrayidx.q.3, align 4
+  %tmp8p = load i32, ptr %arrayidx.p.4, align 4
+  %tmp8q = load i32, ptr %arrayidx.q.3, align 4
   %mul.4 = mul i32 %tmp8p, %tmp8q
   %add.4 = add i32 %mul.4, %add.3
-  %tmp9p = load i32, i32* %arrayidx.p.5, align 4
-  %tmp9q = load i32, i32* %arrayidx.q.2, align 4
+  %tmp9p = load i32, ptr %arrayidx.p.5, align 4
+  %tmp9q = load i32, ptr %arrayidx.q.2, align 4
   %mul.5 = mul i32 %tmp9p, %tmp9q
   %add.5 = add i32 %mul.5, %add.4
-  %tmp10p = load i32, i32* %arrayidx.p.6, align 4
-  %tmp10q = load i32, i32* %arrayidx.q.1, align 4
+  %tmp10p = load i32, ptr %arrayidx.p.6, align 4
+  %tmp10q = load i32, ptr %arrayidx.q.1, align 4
   %mul.6 = mul i32 %tmp10p, %tmp10q
   %add.6 = add i32 %mul.6, %add.5
-  %tmp11p = load i32, i32* %arrayidx.p.7, align 4
-  %tmp11q = load i32, i32* %q, align 4
+  %tmp11p = load i32, ptr %arrayidx.p.7, align 4
+  %tmp11q = load i32, ptr %q, align 4
   %mul.7 = mul i32 %tmp11p, %tmp11q
   %add.7 = add i32 %mul.7, %add.6
   br i1 true, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
index 3d32e28723899..d20161029303d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
@@ -11,39 +11,38 @@
 ; }
 
 ; Vector cost is 5, Scalar cost is 7
-; AVX: Adding cost -2 for reduction that starts with   %0 = load i32, i32* %p, align 4 (It is a splitting reduction)
+; AVX: Adding cost -2 for reduction that starts with   %0 = load i32, ptr %p, align 4 (It is a splitting reduction)
 ; Vector cost is 6, Scalar cost is 7
-; SSE: Adding cost -1 for reduction that starts with   %0 = load i32, i32* %p, align 4 (It is a splitting reduction)
-define i32 @test_add(i32* nocapture readonly %p) {
+; SSE: Adding cost -1 for reduction that starts with   %0 = load i32, ptr %p, align 4 (It is a splitting reduction)
+define i32 @test_add(ptr nocapture readonly %p) {
 ; CHECK-LABEL: @test_add(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
-  %0 = load i32, i32* %p, align 4
-  %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %1 = load i32, i32* %arrayidx.1, align 4
+  %0 = load i32, ptr %p, align 4
+  %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %1 = load i32, ptr %arrayidx.1, align 4
   %mul.18 = add i32 %1, %0
-  %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %2 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %2 = load i32, ptr %arrayidx.2, align 4
   %mul.29 = add i32 %2, %mul.18
-  %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %3 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %3 = load i32, ptr %arrayidx.3, align 4
   %mul.310 = add i32 %3, %mul.29
-  %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %4 = load i32, i32* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %4 = load i32, ptr %arrayidx.4, align 4
   %mul.411 = add i32 %4, %mul.310
-  %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %5 = load i32, i32* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %5 = load i32, ptr %arrayidx.5, align 4
   %mul.512 = add i32 %5, %mul.411
-  %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %6 = load i32, i32* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %6 = load i32, ptr %arrayidx.6, align 4
   %mul.613 = add i32 %6, %mul.512
-  %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7
-  %7 = load i32, i32* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds i32, ptr %p, i64 7
+  %7 = load i32, ptr %arrayidx.7, align 4
   %mul.714 = add i32 %7, %mul.613
   ret i32 %mul.714
 }
@@ -55,62 +54,61 @@ entry:
 ;   return result;
 ; }
 
-define i32 @test_mul(i32* nocapture readonly %p) {
+define i32 @test_mul(ptr nocapture readonly %p) {
 ; AVX-LABEL: @test_mul(
 ; AVX-NEXT:  entry:
-; AVX-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]])
 ; AVX-NEXT:    ret i32 [[TMP2]]
 ;
 ; SSE-LABEL: @test_mul(
 ; SSE-NEXT:  entry:
-; SSE-NEXT:    [[TMP0:%.*]] = load i32, i32* [[P:%.*]], align 4
-; SSE-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
-; SSE-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; SSE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P:%.*]], align 4
+; SSE-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
+; SSE-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
 ; SSE-NEXT:    [[MUL_18:%.*]] = mul i32 [[TMP1]], [[TMP0]]
-; SSE-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
-; SSE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; SSE-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 2
+; SSE-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
 ; SSE-NEXT:    [[MUL_29:%.*]] = mul i32 [[TMP2]], [[MUL_18]]
-; SSE-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
-; SSE-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; SSE-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 3
+; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
 ; SSE-NEXT:    [[MUL_310:%.*]] = mul i32 [[TMP3]], [[MUL_29]]
-; SSE-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
-; SSE-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
+; SSE-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 4
+; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
 ; SSE-NEXT:    [[MUL_411:%.*]] = mul i32 [[TMP4]], [[MUL_310]]
-; SSE-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
-; SSE-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
+; SSE-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 5
+; SSE-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
 ; SSE-NEXT:    [[MUL_512:%.*]] = mul i32 [[TMP5]], [[MUL_411]]
-; SSE-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
-; SSE-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
+; SSE-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 6
+; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
 ; SSE-NEXT:    [[MUL_613:%.*]] = mul i32 [[TMP6]], [[MUL_512]]
-; SSE-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
-; SSE-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
+; SSE-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
 ; SSE-NEXT:    [[MUL_714:%.*]] = mul i32 [[TMP7]], [[MUL_613]]
 ; SSE-NEXT:    ret i32 [[MUL_714]]
 ;
 entry:
-  %0 = load i32, i32* %p, align 4
-  %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %1 = load i32, i32* %arrayidx.1, align 4
+  %0 = load i32, ptr %p, align 4
+  %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %1 = load i32, ptr %arrayidx.1, align 4
   %mul.18 = mul i32 %1, %0
-  %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %2 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %2 = load i32, ptr %arrayidx.2, align 4
   %mul.29 = mul i32 %2, %mul.18
-  %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %3 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %3 = load i32, ptr %arrayidx.3, align 4
   %mul.310 = mul i32 %3, %mul.29
-  %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %4 = load i32, i32* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %4 = load i32, ptr %arrayidx.4, align 4
   %mul.411 = mul i32 %4, %mul.310
-  %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %5 = load i32, i32* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %5 = load i32, ptr %arrayidx.5, align 4
   %mul.512 = mul i32 %5, %mul.411
-  %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %6 = load i32, i32* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %6 = load i32, ptr %arrayidx.6, align 4
   %mul.613 = mul i32 %6, %mul.512
-  %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7
-  %7 = load i32, i32* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds i32, ptr %p, i64 7
+  %7 = load i32, ptr %arrayidx.7, align 4
   %mul.714 = mul i32 %7, %mul.613
   ret i32 %mul.714
 }
@@ -122,36 +120,35 @@ entry:
 ;   return result;
 ; }
 
-define i32 @test_and(i32* nocapture readonly %p) {
+define i32 @test_and(ptr nocapture readonly %p) {
 ; CHECK-LABEL: @test_and(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
-  %0 = load i32, i32* %p, align 4
-  %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %1 = load i32, i32* %arrayidx.1, align 4
+  %0 = load i32, ptr %p, align 4
+  %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %1 = load i32, ptr %arrayidx.1, align 4
   %mul.18 = and i32 %1, %0
-  %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %2 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %2 = load i32, ptr %arrayidx.2, align 4
   %mul.29 = and i32 %2, %mul.18
-  %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %3 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %3 = load i32, ptr %arrayidx.3, align 4
   %mul.310 = and i32 %3, %mul.29
-  %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %4 = load i32, i32* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %4 = load i32, ptr %arrayidx.4, align 4
   %mul.411 = and i32 %4, %mul.310
-  %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %5 = load i32, i32* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %5 = load i32, ptr %arrayidx.5, align 4
   %mul.512 = and i32 %5, %mul.411
-  %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %6 = load i32, i32* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %6 = load i32, ptr %arrayidx.6, align 4
   %mul.613 = and i32 %6, %mul.512
-  %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7
-  %7 = load i32, i32* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds i32, ptr %p, i64 7
+  %7 = load i32, ptr %arrayidx.7, align 4
   %mul.714 = and i32 %7, %mul.613
   ret i32 %mul.714
 }
@@ -163,36 +160,35 @@ entry:
 ;   return result;
 ; }
 
-define i32 @test_or(i32* nocapture readonly %p) {
+define i32 @test_or(ptr nocapture readonly %p) {
 ; CHECK-LABEL: @test_or(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
-  %0 = load i32, i32* %p, align 4
-  %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %1 = load i32, i32* %arrayidx.1, align 4
+  %0 = load i32, ptr %p, align 4
+  %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %1 = load i32, ptr %arrayidx.1, align 4
   %mul.18 = or i32 %1, %0
-  %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %2 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %2 = load i32, ptr %arrayidx.2, align 4
   %mul.29 = or i32 %2, %mul.18
-  %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %3 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %3 = load i32, ptr %arrayidx.3, align 4
   %mul.310 = or i32 %3, %mul.29
-  %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %4 = load i32, i32* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %4 = load i32, ptr %arrayidx.4, align 4
   %mul.411 = or i32 %4, %mul.310
-  %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %5 = load i32, i32* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %5 = load i32, ptr %arrayidx.5, align 4
   %mul.512 = or i32 %5, %mul.411
-  %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %6 = load i32, i32* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %6 = load i32, ptr %arrayidx.6, align 4
   %mul.613 = or i32 %6, %mul.512
-  %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7
-  %7 = load i32, i32* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds i32, ptr %p, i64 7
+  %7 = load i32, ptr %arrayidx.7, align 4
   %mul.714 = or i32 %7, %mul.613
   ret i32 %mul.714
 }
@@ -204,63 +200,62 @@ entry:
 ;   return result;
 ; }
 
-define i32 @test_xor(i32* nocapture readonly %p) {
+define i32 @test_xor(ptr nocapture readonly %p) {
 ; CHECK-LABEL: @test_xor(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
-  %0 = load i32, i32* %p, align 4
-  %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
-  %1 = load i32, i32* %arrayidx.1, align 4
+  %0 = load i32, ptr %p, align 4
+  %arrayidx.1 = getelementptr inbounds i32, ptr %p, i64 1
+  %1 = load i32, ptr %arrayidx.1, align 4
   %mul.18 = xor i32 %1, %0
-  %arrayidx.2 = getelementptr inbounds i32, i32* %p, i64 2
-  %2 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx.2 = getelementptr inbounds i32, ptr %p, i64 2
+  %2 = load i32, ptr %arrayidx.2, align 4
   %mul.29 = xor i32 %2, %mul.18
-  %arrayidx.3 = getelementptr inbounds i32, i32* %p, i64 3
-  %3 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx.3 = getelementptr inbounds i32, ptr %p, i64 3
+  %3 = load i32, ptr %arrayidx.3, align 4
   %mul.310 = xor i32 %3, %mul.29
-  %arrayidx.4 = getelementptr inbounds i32, i32* %p, i64 4
-  %4 = load i32, i32* %arrayidx.4, align 4
+  %arrayidx.4 = getelementptr inbounds i32, ptr %p, i64 4
+  %4 = load i32, ptr %arrayidx.4, align 4
   %mul.411 = xor i32 %4, %mul.310
-  %arrayidx.5 = getelementptr inbounds i32, i32* %p, i64 5
-  %5 = load i32, i32* %arrayidx.5, align 4
+  %arrayidx.5 = getelementptr inbounds i32, ptr %p, i64 5
+  %5 = load i32, ptr %arrayidx.5, align 4
   %mul.512 = xor i32 %5, %mul.411
-  %arrayidx.6 = getelementptr inbounds i32, i32* %p, i64 6
-  %6 = load i32, i32* %arrayidx.6, align 4
+  %arrayidx.6 = getelementptr inbounds i32, ptr %p, i64 6
+  %6 = load i32, ptr %arrayidx.6, align 4
   %mul.613 = xor i32 %6, %mul.512
-  %arrayidx.7 = getelementptr inbounds i32, i32* %p, i64 7
-  %7 = load i32, i32* %arrayidx.7, align 4
+  %arrayidx.7 = getelementptr inbounds i32, ptr %p, i64 7
+  %7 = load i32, ptr %arrayidx.7, align 4
   %mul.714 = xor i32 %7, %mul.613
   ret i32 %mul.714
 }
 
-define i32 @PR37731(<4 x i32>* noalias nocapture dereferenceable(16) %self) unnamed_addr #0 {
+define i32 @PR37731(ptr noalias nocapture dereferenceable(16) %self) unnamed_addr #0 {
 ; CHECK-LABEL: @PR37731(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[SELF:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[SELF:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[TMP0]], <i32 6, i32 2, i32 13, i32 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 13, i32 27, i32 21, i32 12>
 ; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[TMP0]], <i32 -2, i32 -8, i32 -16, i32 -128>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> [[TMP4]], <i32 18, i32 2, i32 7, i32 13>
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[SELF]], align 16
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP6]])
 ; CHECK-NEXT:    ret i32 [[TMP7]]
 ;
 entry:
-  %0 = load <4 x i32>, <4 x i32>* %self, align 16
+  %0 = load <4 x i32>, ptr %self, align 16
   %1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
   %2 = xor <4 x i32> %1, %0
   %3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
   %4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
   %5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
   %6 = xor <4 x i32> %3, %5
-  store <4 x i32> %6, <4 x i32>* %self, align 16
+  store <4 x i32> %6, ptr %self, align 16
   %7 = extractelement <4 x i32> %6, i32 0
   %8 = extractelement <4 x i32> %6, i32 1
   %9 = xor i32 %7, %8

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll
index c9ca400cf7498..83457cc4966f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll
@@ -5,111 +5,109 @@
 ; but the most effective way to vectorize it is to match both 8-way reductions
 ; feeding the insertelement vector build sequence.
 
-declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32 immarg, <2 x i1>)
+declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32 immarg, <2 x i1>)
 
-define void @test(double* nocapture readonly %arg, double* nocapture readonly %arg1, double* nocapture %arg2) {
+define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x double*> poison, double* [[ARG:%.*]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x double*> [[TMP0]], <8 x double*> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, <8 x double*> [[SHUFFLE]], <8 x i64> <i64 1, i64 3, i64 5, i64 7, i64 9, i64 11, i64 13, i64 15>
-; CHECK-NEXT:    [[GEP2_0:%.*]] = getelementptr inbounds double, double* [[ARG1:%.*]], i64 16
-; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x double> poison)
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[GEP2_0]] to <8 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x double>, <8 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARG:%.*]], i32 0
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, <8 x ptr> [[SHUFFLE]], <8 x i64> <i64 1, i64 3, i64 5, i64 7, i64 9, i64 11, i64 13, i64 15>
+; CHECK-NEXT:    [[GEP2_0:%.*]] = getelementptr inbounds double, ptr [[ARG1:%.*]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x double> poison)
+; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x double>, ptr [[GEP2_0]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[ARG1]] to <8 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x double>, <8 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x double>, ptr [[ARG1]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <8 x double> [[TMP7]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP8]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP5]])
 ; CHECK-NEXT:    [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP10]], i64 1
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds double, double* [[ARG2:%.*]], <2 x i64> <i64 0, i64 16>
-; CHECK-NEXT:    call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[I143]], <2 x double*> [[P]], i32 8, <2 x i1> <i1 true, i1 true>)
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> <i64 0, i64 16>
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> <i1 true, i1 true>)
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep1.0 = getelementptr inbounds double, double* %arg, i64 1
-  %ld1.0 = load double, double* %gep1.0, align 8
-  %ld0.0 = load double, double* %arg1, align 8
+  %gep1.0 = getelementptr inbounds double, ptr %arg, i64 1
+  %ld1.0 = load double, ptr %gep1.0, align 8
+  %ld0.0 = load double, ptr %arg1, align 8
   %mul1.0 = fmul fast double %ld0.0, %ld1.0
-  %gep2.0 = getelementptr inbounds double, double* %arg1, i64 16
-  %ld2.0 = load double, double* %gep2.0, align 8
+  %gep2.0 = getelementptr inbounds double, ptr %arg1, i64 16
+  %ld2.0 = load double, ptr %gep2.0, align 8
   %mul2.0 = fmul fast double %ld2.0, %ld1.0
-  %gep1.1 = getelementptr inbounds double, double* %arg, i64 3
-  %ld1.1 = load double, double* %gep1.1, align 8
-  %gep0.1 = getelementptr inbounds double, double* %arg1, i64 1
-  %ld0.1 = load double, double* %gep0.1, align 8
+  %gep1.1 = getelementptr inbounds double, ptr %arg, i64 3
+  %ld1.1 = load double, ptr %gep1.1, align 8
+  %gep0.1 = getelementptr inbounds double, ptr %arg1, i64 1
+  %ld0.1 = load double, ptr %gep0.1, align 8
   %mul1.1 = fmul fast double %ld0.1, %ld1.1
   %rdx1.0 = fadd fast double %mul1.0, %mul1.1
-  %gep2.1 = getelementptr inbounds double, double* %arg1, i64 17
-  %ld2.1 = load double, double* %gep2.1, align 8
+  %gep2.1 = getelementptr inbounds double, ptr %arg1, i64 17
+  %ld2.1 = load double, ptr %gep2.1, align 8
   %mul2.1 = fmul fast double %ld2.1, %ld1.1
   %rdx2.0 = fadd fast double %mul2.0, %mul2.1
-  %gep1.2 = getelementptr inbounds double, double* %arg, i64 5
-  %ld1.2 = load double, double* %gep1.2, align 8
-  %gep0.2 = getelementptr inbounds double, double* %arg1, i64 2
-  %ld0.2 = load double, double* %gep0.2, align 8
+  %gep1.2 = getelementptr inbounds double, ptr %arg, i64 5
+  %ld1.2 = load double, ptr %gep1.2, align 8
+  %gep0.2 = getelementptr inbounds double, ptr %arg1, i64 2
+  %ld0.2 = load double, ptr %gep0.2, align 8
   %mul1.2 = fmul fast double %ld0.2, %ld1.2
   %rdx1.1 = fadd fast double %rdx1.0, %mul1.2
-  %gep2.2 = getelementptr inbounds double, double* %arg1, i64 18
-  %ld2.2 = load double, double* %gep2.2, align 8
+  %gep2.2 = getelementptr inbounds double, ptr %arg1, i64 18
+  %ld2.2 = load double, ptr %gep2.2, align 8
   %mul2.2 = fmul fast double %ld2.2, %ld1.2
   %rdx2.1 = fadd fast double %rdx2.0, %mul2.2
-  %gep1.3 = getelementptr inbounds double, double* %arg, i64 7
-  %ld1.3 = load double, double* %gep1.3, align 8
-  %gep0.3 = getelementptr inbounds double, double* %arg1, i64 3
-  %ld0.3 = load double, double* %gep0.3, align 8
+  %gep1.3 = getelementptr inbounds double, ptr %arg, i64 7
+  %ld1.3 = load double, ptr %gep1.3, align 8
+  %gep0.3 = getelementptr inbounds double, ptr %arg1, i64 3
+  %ld0.3 = load double, ptr %gep0.3, align 8
   %mul1.3 = fmul fast double %ld0.3, %ld1.3
   %rdx1.2 = fadd fast double %rdx1.1, %mul1.3
-  %gep2.3 = getelementptr inbounds double, double* %arg1, i64 19
-  %ld2.3 = load double, double* %gep2.3, align 8
+  %gep2.3 = getelementptr inbounds double, ptr %arg1, i64 19
+  %ld2.3 = load double, ptr %gep2.3, align 8
   %mul2.3 = fmul fast double %ld2.3, %ld1.3
   %rdx2.2 = fadd fast double %rdx2.1, %mul2.3
-  %gep1.4 = getelementptr inbounds double, double* %arg, i64 9
-  %ld1.4 = load double, double* %gep1.4, align 8
-  %gep0.4 = getelementptr inbounds double, double* %arg1, i64 4
-  %ld0.4 = load double, double* %gep0.4, align 8
+  %gep1.4 = getelementptr inbounds double, ptr %arg, i64 9
+  %ld1.4 = load double, ptr %gep1.4, align 8
+  %gep0.4 = getelementptr inbounds double, ptr %arg1, i64 4
+  %ld0.4 = load double, ptr %gep0.4, align 8
   %mul1.4 = fmul fast double %ld0.4, %ld1.4
   %rdx1.3 = fadd fast double %rdx1.2, %mul1.4
-  %gep2.4 = getelementptr inbounds double, double* %arg1, i64 20
-  %ld2.4 = load double, double* %gep2.4, align 8
+  %gep2.4 = getelementptr inbounds double, ptr %arg1, i64 20
+  %ld2.4 = load double, ptr %gep2.4, align 8
   %mul2.4 = fmul fast double %ld2.4, %ld1.4
   %rdx2.3 = fadd fast double %rdx2.2, %mul2.4
-  %gep1.5 = getelementptr inbounds double, double* %arg, i64 11
-  %ld1.5 = load double, double* %gep1.5, align 8
-  %gep0.5 = getelementptr inbounds double, double* %arg1, i64 5
-  %ld0.5 = load double, double* %gep0.5, align 8
+  %gep1.5 = getelementptr inbounds double, ptr %arg, i64 11
+  %ld1.5 = load double, ptr %gep1.5, align 8
+  %gep0.5 = getelementptr inbounds double, ptr %arg1, i64 5
+  %ld0.5 = load double, ptr %gep0.5, align 8
   %mul1.5 = fmul fast double %ld0.5, %ld1.5
   %rdx1.4 = fadd fast double %rdx1.3, %mul1.5
-  %gep2.5 = getelementptr inbounds double, double* %arg1, i64 21
-  %ld2.5 = load double, double* %gep2.5, align 8
+  %gep2.5 = getelementptr inbounds double, ptr %arg1, i64 21
+  %ld2.5 = load double, ptr %gep2.5, align 8
   %mul2.5 = fmul fast double %ld2.5, %ld1.5
   %rdx2.4 = fadd fast double %rdx2.3, %mul2.5
-  %gep1.6 = getelementptr inbounds double, double* %arg, i64 13
-  %ld1.6 = load double, double* %gep1.6, align 8
-  %gep0.6 = getelementptr inbounds double, double* %arg1, i64 6
-  %ld0.6 = load double, double* %gep0.6, align 8
+  %gep1.6 = getelementptr inbounds double, ptr %arg, i64 13
+  %ld1.6 = load double, ptr %gep1.6, align 8
+  %gep0.6 = getelementptr inbounds double, ptr %arg1, i64 6
+  %ld0.6 = load double, ptr %gep0.6, align 8
   %mul1.6 = fmul fast double %ld0.6, %ld1.6
   %rdx1.5 = fadd fast double %rdx1.4, %mul1.6
-  %gep2.6 = getelementptr inbounds double, double* %arg1, i64 22
-  %ld2.6 = load double, double* %gep2.6, align 8
+  %gep2.6 = getelementptr inbounds double, ptr %arg1, i64 22
+  %ld2.6 = load double, ptr %gep2.6, align 8
   %mul2.6 = fmul fast double %ld2.6, %ld1.6
   %rdx2.5 = fadd fast double %rdx2.4, %mul2.6
-  %gep1.7 = getelementptr inbounds double, double* %arg, i64 15
-  %ld1.7 = load double, double* %gep1.7, align 8
-  %gep0.7 = getelementptr inbounds double, double* %arg1, i64 7
-  %ld0.7 = load double, double* %gep0.7, align 8
+  %gep1.7 = getelementptr inbounds double, ptr %arg, i64 15
+  %ld1.7 = load double, ptr %gep1.7, align 8
+  %gep0.7 = getelementptr inbounds double, ptr %arg1, i64 7
+  %ld0.7 = load double, ptr %gep0.7, align 8
   %mul1.7 = fmul fast double %ld0.7, %ld1.7
   %rdx1 = fadd fast double %rdx1.5, %mul1.7
-  %gep2.7 = getelementptr inbounds double, double* %arg1, i64 23
-  %ld2.7 = load double, double* %gep2.7, align 8
+  %gep2.7 = getelementptr inbounds double, ptr %arg1, i64 23
+  %ld2.7 = load double, ptr %gep2.7, align 8
   %mul2.7 = fmul fast double %ld2.7, %ld1.7
   %rdx2 = fadd fast double %rdx2.5, %mul2.7
   %i142 = insertelement <2 x double> poison, double %rdx1, i64 0
   %i143 = insertelement <2 x double> %i142, double %rdx2, i64 1
-  %p = getelementptr inbounds double, double* %arg2, <2 x i64> <i64 0, i64 16>
-  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %i143, <2 x double*> %p, i32 8, <2 x i1> <i1 true, i1 true>)
+  %p = getelementptr inbounds double, ptr %arg2, <2 x i64> <i64 0, i64 16>
+  call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %i143, <2 x ptr> %p, i32 8, <2 x i1> <i1 true, i1 true>)
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll
index 3a90639a43305..19d0bc9b33065 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll
@@ -1,54 +1,53 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -mtriple=x86_64 -passes=slp-vectorizer -S -mcpu=skylake-avx512 | FileCheck %s
 
-declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32 immarg, <2 x i1>)
+declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32 immarg, <2 x i1>)
 
-define void @rdx_feeds_single_insert(<2 x double> %v, double* nocapture readonly %arg, double* nocapture readonly %arg1, double* nocapture %arg2) {
+define void @rdx_feeds_single_insert(<2 x double> %v, ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2) {
 ; CHECK-LABEL: @rdx_feeds_single_insert(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARG1:%.*]] to <8 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr [[ARG1:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <8 x double> [[TMP1]], <double 1.000000e+01, double 1.100000e+01, double 1.200000e+01, double 1.300000e+01, double 1.400000e+01, double 1.500000e+01, double 1.600000e+01, double 1.700000e+01>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP2]])
 ; CHECK-NEXT:    [[I:%.*]] = insertelement <2 x double> [[V:%.*]], double [[TMP3]], i64 1
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds double, double* [[ARG2:%.*]], <2 x i64> <i64 0, i64 16>
-; CHECK-NEXT:    call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[I]], <2 x double*> [[P]], i32 8, <2 x i1> <i1 true, i1 true>)
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> <i64 0, i64 16>
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I]], <2 x ptr> [[P]], i32 8, <2 x i1> <i1 true, i1 true>)
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %ld0.0 = load double, double* %arg1, align 8
+  %ld0.0 = load double, ptr %arg1, align 8
   %mul1.0 = fmul fast double %ld0.0, 10.0
-  %gep0.1 = getelementptr inbounds double, double* %arg1, i64 1
-  %ld0.1 = load double, double* %gep0.1, align 8
+  %gep0.1 = getelementptr inbounds double, ptr %arg1, i64 1
+  %ld0.1 = load double, ptr %gep0.1, align 8
   %mul1.1 = fmul fast double %ld0.1, 11.0
   %rdx1.0 = fadd fast double %mul1.0, %mul1.1
-  %gep0.2 = getelementptr inbounds double, double* %arg1, i64 2
-  %ld0.2 = load double, double* %gep0.2, align 8
+  %gep0.2 = getelementptr inbounds double, ptr %arg1, i64 2
+  %ld0.2 = load double, ptr %gep0.2, align 8
   %mul1.2 = fmul fast double %ld0.2, 12.0
   %rdx1.1 = fadd fast double %rdx1.0, %mul1.2
-  %gep0.3 = getelementptr inbounds double, double* %arg1, i64 3
-  %ld0.3 = load double, double* %gep0.3, align 8
+  %gep0.3 = getelementptr inbounds double, ptr %arg1, i64 3
+  %ld0.3 = load double, ptr %gep0.3, align 8
   %mul1.3 = fmul fast double %ld0.3, 13.0
   %rdx1.2 = fadd fast double %rdx1.1, %mul1.3
-  %gep0.4 = getelementptr inbounds double, double* %arg1, i64 4
-  %ld0.4 = load double, double* %gep0.4, align 8
+  %gep0.4 = getelementptr inbounds double, ptr %arg1, i64 4
+  %ld0.4 = load double, ptr %gep0.4, align 8
   %mul1.4 = fmul fast double %ld0.4, 14.0
   %rdx1.3 = fadd fast double %rdx1.2, %mul1.4
-  %gep0.5 = getelementptr inbounds double, double* %arg1, i64 5
-  %ld0.5 = load double, double* %gep0.5, align 8
+  %gep0.5 = getelementptr inbounds double, ptr %arg1, i64 5
+  %ld0.5 = load double, ptr %gep0.5, align 8
   %mul1.5 = fmul fast double %ld0.5, 15.0
   %rdx1.4 = fadd fast double %rdx1.3, %mul1.5
-  %gep0.6 = getelementptr inbounds double, double* %arg1, i64 6
-  %ld0.6 = load double, double* %gep0.6, align 8
+  %gep0.6 = getelementptr inbounds double, ptr %arg1, i64 6
+  %ld0.6 = load double, ptr %gep0.6, align 8
   %mul1.6 = fmul fast double %ld0.6, 16.0
   %rdx1.5 = fadd fast double %rdx1.4, %mul1.6
-  %gep0.7 = getelementptr inbounds double, double* %arg1, i64 7
-  %ld0.7 = load double, double* %gep0.7, align 8
+  %gep0.7 = getelementptr inbounds double, ptr %arg1, i64 7
+  %ld0.7 = load double, ptr %gep0.7, align 8
   %mul1.7 = fmul fast double %ld0.7, 17.0
   %rdx1 = fadd fast double %rdx1.5, %mul1.7
   %i = insertelement <2 x double> %v, double %rdx1, i64 1
-  %p = getelementptr inbounds double, double* %arg2, <2 x i64> <i64 0, i64 16>
-  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %i, <2 x double*> %p, i32 8, <2 x i1> <i1 true, i1 true>)
+  %p = getelementptr inbounds double, ptr %arg2, <2 x i64> <i64 0, i64 16>
+  call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %i, <2 x ptr> %p, i32 8, <2 x i1> <i1 true, i1 true>)
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index 873948c9596f5..e8ca9aeb8147c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -2,17 +2,15 @@
 ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -mattr=sse2 -passes=slp-vectorizer -pass-remarks-output=%t < %s -slp-threshold=-2 | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
-define void @fextr(i16* %ptr) {
+define void @fextr(ptr %ptr) {
 ; CHECK-LABEL: @fextr(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LD:%.*]] = load <8 x i16>, <8 x i16>* undef, align 16
+; CHECK-NEXT:    [[LD:%.*]] = load <8 x i16>, ptr undef, align 16
 ; CHECK-NEXT:    br label [[T:%.*]]
 ; CHECK:       t:
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP0]], ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    ret void
 ;
 ; YAML:      Pass:            slp-vectorizer
@@ -25,7 +23,7 @@ define void @fextr(i16* %ptr) {
 ; YAML-NEXT:   - TreeSize:        '4'
 
 entry:
-  %LD = load <8 x i16>, <8 x i16>* undef
+  %LD = load <8 x i16>, ptr undef
   %V0 = extractelement <8 x i16> %LD, i32 0
   br label %t
 
@@ -37,14 +35,13 @@ t:
   %V5 = extractelement <8 x i16> %LD, i32 5
   %V6 = extractelement <8 x i16> %LD, i32 6
   %V7 = extractelement <8 x i16> %LD, i32 7
-  %P0 = getelementptr inbounds i16, i16* %ptr, i64 0
-  %P1 = getelementptr inbounds i16, i16* %ptr, i64 1
-  %P2 = getelementptr inbounds i16, i16* %ptr, i64 2
-  %P3 = getelementptr inbounds i16, i16* %ptr, i64 3
-  %P4 = getelementptr inbounds i16, i16* %ptr, i64 4
-  %P5 = getelementptr inbounds i16, i16* %ptr, i64 5
-  %P6 = getelementptr inbounds i16, i16* %ptr, i64 6
-  %P7 = getelementptr inbounds i16, i16* %ptr, i64 7
+  %P1 = getelementptr inbounds i16, ptr %ptr, i64 1
+  %P2 = getelementptr inbounds i16, ptr %ptr, i64 2
+  %P3 = getelementptr inbounds i16, ptr %ptr, i64 3
+  %P4 = getelementptr inbounds i16, ptr %ptr, i64 4
+  %P5 = getelementptr inbounds i16, ptr %ptr, i64 5
+  %P6 = getelementptr inbounds i16, ptr %ptr, i64 6
+  %P7 = getelementptr inbounds i16, ptr %ptr, i64 7
   %A0 = add i16 %V0, %V0
   %A1 = add i16 %V1, undef
   %A2 = add i16 %V2, %V0
@@ -53,13 +50,13 @@ t:
   %A5 = add i16 %V5, %V0
   %A6 = add i16 %V6, %V0
   %A7 = add i16 %V7, %V0
-  store i16 %A0, i16* %P0, align 2
-  store i16 %A1, i16* %P1, align 2
-  store i16 %A2, i16* %P2, align 2
-  store i16 %A3, i16* %P3, align 2
-  store i16 %A4, i16* %P4, align 2
-  store i16 %A5, i16* %P5, align 2
-  store i16 %A6, i16* %P6, align 2
-  store i16 %A7, i16* %P7, align 2
+  store i16 %A0, ptr %ptr, align 2
+  store i16 %A1, ptr %P1, align 2
+  store i16 %A2, ptr %P2, align 2
+  store i16 %A3, ptr %P3, align 2
+  store i16 %A4, ptr %P4, align 2
+  store i16 %A5, ptr %P5, align 2
+  store i16 %A6, ptr %P6, align 2
+  store i16 %A7, ptr %P7, align 2
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
index a0f776886ad4b..d79c08763be8a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll
@@ -2,27 +2,23 @@
 ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
-define i32 @foo(i32* %
diff ) #0 {
+define i32 @foo(ptr %
diff ) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M2:%.*]] = alloca [8 x [8 x i32]], align 16
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [8 x [8 x i32]]* [[M2]] to i8*
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DIFF:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DIFF]], i64 [[TMP2]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[ARRAYIDX6]], align 16
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP9]], [[A_088]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -33,55 +29,54 @@ define i32 @foo(i32* %
diff ) #0 {
 ;
 entry:
   %m2 = alloca [8 x [8 x i32]], align 16
-  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
-  %1 = shl i64 %indvars.iv, 3
-  %arrayidx = getelementptr inbounds i32, i32* %
diff , i64 %1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = or i64 %1, 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %
diff , i64 %3
-  %4 = load i32, i32* %arrayidx2, align 4
-  %add3 = add nsw i32 %4, %2
-  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
-  store i32 %add3, i32* %arrayidx6, align 16
+  %0 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32, ptr %
diff , i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
+  %2 = or i64 %0, 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %
diff , i64 %2
+  %3 = load i32, ptr %arrayidx2, align 4
+  %add3 = add nsw i32 %3, %1
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, ptr %arrayidx6, align 16
 
   %add10 = add nsw i32 %add3, %a.088
-  %5 = or i64 %1, 1
-  %arrayidx13 = getelementptr inbounds i32, i32* %
diff , i64 %5
-  %6 = load i32, i32* %arrayidx13, align 4
-  %7 = or i64 %1, 5
-  %arrayidx16 = getelementptr inbounds i32, i32* %
diff , i64 %7
-  %8 = load i32, i32* %arrayidx16, align 4
-  %add17 = add nsw i32 %8, %6
-  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
-  store i32 %add17, i32* %arrayidx20, align 4
+  %4 = or i64 %0, 1
+  %arrayidx13 = getelementptr inbounds i32, ptr %
diff , i64 %4
+  %5 = load i32, ptr %arrayidx13, align 4
+  %6 = or i64 %0, 5
+  %arrayidx16 = getelementptr inbounds i32, ptr %
diff , i64 %6
+  %7 = load i32, ptr %arrayidx16, align 4
+  %add17 = add nsw i32 %7, %5
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, ptr %arrayidx20, align 4
 
   %add24 = add nsw i32 %add10, %add17
-  %9 = or i64 %1, 2
-  %arrayidx27 = getelementptr inbounds i32, i32* %
diff , i64 %9
-  %10 = load i32, i32* %arrayidx27, align 4
-  %11 = or i64 %1, 6
-  %arrayidx30 = getelementptr inbounds i32, i32* %
diff , i64 %11
-  %12 = load i32, i32* %arrayidx30, align 4
-  %add31 = add nsw i32 %12, %10
-  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
-  store i32 %add31, i32* %arrayidx34, align 8
+  %8 = or i64 %0, 2
+  %arrayidx27 = getelementptr inbounds i32, ptr %
diff , i64 %8
+  %9 = load i32, ptr %arrayidx27, align 4
+  %10 = or i64 %0, 6
+  %arrayidx30 = getelementptr inbounds i32, ptr %
diff , i64 %10
+  %11 = load i32, ptr %arrayidx30, align 4
+  %add31 = add nsw i32 %11, %9
+  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 2
+  store i32 %add31, ptr %arrayidx34, align 8
 
   %add38 = add nsw i32 %add24, %add31
-  %13 = or i64 %1, 3
-  %arrayidx41 = getelementptr inbounds i32, i32* %
diff , i64 %13
-  %14 = load i32, i32* %arrayidx41, align 4
-  %15 = or i64 %1, 7
-  %arrayidx44 = getelementptr inbounds i32, i32* %
diff , i64 %15
-  %16 = load i32, i32* %arrayidx44, align 4
+  %12 = or i64 %0, 3
+  %arrayidx41 = getelementptr inbounds i32, ptr %
diff , i64 %12
+  %13 = load i32, ptr %arrayidx41, align 4
+  %14 = or i64 %0, 7
+  %arrayidx44 = getelementptr inbounds i32, ptr %
diff , i64 %14
+  %15 = load i32, ptr %arrayidx44, align 4
 
-  %add45 = add nsw i32 %16, %14
-  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
-  store i32 %add45, i32* %arrayidx48, align 4
+  %add45 = add nsw i32 %15, %13
+  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 3
+  store i32 %add45, ptr %arrayidx48, align 4
 
   %add52 = add nsw i32 %add38, %add45
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
index 4c430986dba28..2cd7adaad969f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
@@ -2,21 +2,21 @@
 ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
-define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
+define void @vsub2_test(ptr %pin1, ptr %pin2, ptr %pout) #0 {
 ; CHECK-LABEL: @vsub2_test(
 ; CHECK-NEXT:    br label [[TMP1:%.*]]
 ; CHECK:       1:
 ; CHECK-NEXT:    [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[PO_03:%.*]] = phi i32* [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi i32* [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi i32* [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ]
-; CHECK-NEXT:    [[TMP2]] = getelementptr inbounds i32, i32* [[PTMPI1_01]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[PTMPI1_01]], align 4, !tbaa [[TBAA1:![0-9]+]]
-; CHECK-NEXT:    [[TMP4]] = getelementptr inbounds i32, i32* [[PTMPI2_02]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[PTMPI2_02]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[PO_03:%.*]] = phi ptr [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi ptr [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi ptr [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ]
+; CHECK-NEXT:    [[TMP2]] = getelementptr inbounds i32, ptr [[PTMPI1_01]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTMPI1_01]], align 4, !tbaa [[TBAA1:![0-9]+]]
+; CHECK-NEXT:    [[TMP4]] = getelementptr inbounds i32, ptr [[PTMPI2_02]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[PTMPI2_02]], align 4, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i32, i32* [[PO_03]], i64 1
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[PO_03]], align 4, !tbaa [[TBAA1]]
+; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i32, ptr [[PO_03]], i64 1
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[PO_03]], align 4, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[IDX_04]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP8]], 64
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -26,15 +26,15 @@ define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
   br label %1
 
   %idx.04 = phi i32 [ 0, %0 ], [ %8, %1 ]
-  %po.03 = phi i32* [ %pout, %0 ], [ %7, %1 ]
-  %ptmpi2.02 = phi i32* [ %pin2, %0 ], [ %4, %1 ]
-  %ptmpi1.01 = phi i32* [ %pin1, %0 ], [ %2, %1 ]
-  %2 = getelementptr inbounds i32, i32* %ptmpi1.01, i64 1
-  %3 = load i32, i32* %ptmpi1.01, align 4, !tbaa !1
-  %4 = getelementptr inbounds i32, i32* %ptmpi2.02, i64 1
-  %5 = load i32, i32* %ptmpi2.02, align 4, !tbaa !1
+  %po.03 = phi ptr [ %pout, %0 ], [ %7, %1 ]
+  %ptmpi2.02 = phi ptr [ %pin2, %0 ], [ %4, %1 ]
+  %ptmpi1.01 = phi ptr [ %pin1, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i32, ptr %ptmpi1.01, i64 1
+  %3 = load i32, ptr %ptmpi1.01, align 4, !tbaa !1
+  %4 = getelementptr inbounds i32, ptr %ptmpi2.02, i64 1
+  %5 = load i32, ptr %ptmpi2.02, align 4, !tbaa !1
   %6 = sub nsw i32 %3, %5
-  %7 = getelementptr inbounds i32, i32* %po.03, i64 1
+  %7 = getelementptr inbounds i32, ptr %po.03, i64 1
   ; YAML:      Pass:            slp-vectorizer
   ; YAML-NEXT: Name:            NotBeneficial
   ; YAML-NEXT: Function:        vsub2_test
@@ -43,7 +43,7 @@ define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
   ; YAML-NEXT:   - Cost:            '0'
   ; YAML-NEXT:   - String:          ' >= '
   ; YAML-NEXT:   - Treshold:        '0'
-  store i32 %6, i32* %po.03, align 4, !tbaa !1
+  store i32 %6, ptr %po.03, align 4, !tbaa !1
   %8 = add nuw nsw i32 %idx.04, 1
   %exitcond = icmp eq i32 %8, 64
   br i1 %exitcond, label %9, label %1, !llvm.loop !5

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
index 1a10bd481690f..21a8654ef03b0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_not_all_parts.ll
@@ -2,65 +2,59 @@
 ; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
-define i32 @foo(i32* nocapture readonly %
diff ) #0 {
+define i32 @foo(ptr nocapture readonly %
diff ) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M2:%.*]] = alloca [8 x [8 x i32]], align 16
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [8 x [8 x i32]]* [[M2]] to i8*
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD24:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DIFF:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX2]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DIFF]], i64 [[TMP2]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <2 x i32> [[TMP6]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
 ; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP8]], [[A_088]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX6]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP7]], <2 x i32>* [[TMP9]], align 16
+; CHECK-NEXT:    store <2 x i32> [[TMP7]], ptr [[ARRAYIDX6]], align 16
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
 ; CHECK-NEXT:    [[ADD24]] = add nsw i32 [[ADD10]], [[TMP10]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 0
 ; CHECK-NEXT:    ret i32 [[ADD24]]
 ;
 entry:
   %m2 = alloca [8 x [8 x i32]], align 16
-  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %a.088 = phi i32 [ 0, %entry ], [ %add24, %for.body ]
-  %1 = shl i64 %indvars.iv, 3
-  %arrayidx = getelementptr inbounds i32, i32* %
diff , i64 %1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = or i64 %1, 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %
diff , i64 %3
-  %4 = load i32, i32* %arrayidx2, align 4
-  %add3 = add nsw i32 %4, %2
-  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
-  store i32 %add3, i32* %arrayidx6, align 16
+  %0 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32, ptr %
diff , i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
+  %2 = or i64 %0, 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %
diff , i64 %2
+  %3 = load i32, ptr %arrayidx2, align 4
+  %add3 = add nsw i32 %3, %1
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, ptr %arrayidx6, align 16
   %add10 = add nsw i32 %add3, %a.088
-  %5 = or i64 %1, 1
-  %arrayidx13 = getelementptr inbounds i32, i32* %
diff , i64 %5
-  %6 = load i32, i32* %arrayidx13, align 4
-  %7 = or i64 %1, 5
-  %arrayidx16 = getelementptr inbounds i32, i32* %
diff , i64 %7
-  %8 = load i32, i32* %arrayidx16, align 4
-  %add17 = add nsw i32 %8, %6
-  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
-  store i32 %add17, i32* %arrayidx20, align 4
+  %4 = or i64 %0, 1
+  %arrayidx13 = getelementptr inbounds i32, ptr %
diff , i64 %4
+  %5 = load i32, ptr %arrayidx13, align 4
+  %6 = or i64 %0, 5
+  %arrayidx16 = getelementptr inbounds i32, ptr %
diff , i64 %6
+  %7 = load i32, ptr %arrayidx16, align 4
+  %add17 = add nsw i32 %7, %5
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, ptr %arrayidx20, align 4
   %add24 = add nsw i32 %add10, %add17
 
   ; YAML:      Pass:            slp-vectorizer
@@ -77,7 +71,6 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
-  %arraydecay = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 0
   ret i32 %add24
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll
index 10b4ee1ba2c98..540ea4eb659fe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll
@@ -3,18 +3,17 @@
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
 ; This type is not supported by SLP
-define i1 @test(x86_fp80* %i1, x86_fp80* %i2) {
+define i1 @test(ptr %i1, ptr %i2) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I1_0:%.*]] = load x86_fp80, x86_fp80* [[I1:%.*]], align 16
-; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr x86_fp80, x86_fp80* [[I1]], i64 1
-; CHECK-NEXT:    [[I1_1:%.*]] = load x86_fp80, x86_fp80* [[I1_GEP1]], align 16
+; CHECK-NEXT:    [[I1_0:%.*]] = load x86_fp80, ptr [[I1:%.*]], align 16
+; CHECK-NEXT:    [[I1_GEP1:%.*]] = getelementptr x86_fp80, ptr [[I1]], i64 1
+; CHECK-NEXT:    [[I1_1:%.*]] = load x86_fp80, ptr [[I1_GEP1]], align 16
 ; CHECK-NEXT:    br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[I2_GEP0:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2:%.*]], i64 0
-; CHECK-NEXT:    [[I2_0:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP0]], align 16
-; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[I2]], i64 1
-; CHECK-NEXT:    [[I2_1:%.*]] = load x86_fp80, x86_fp80* [[I2_GEP1]], align 16
+; CHECK-NEXT:    [[I2_0:%.*]] = load x86_fp80, ptr [[I2:%.*]], align 16
+; CHECK-NEXT:    [[I2_GEP1:%.*]] = getelementptr inbounds x86_fp80, ptr [[I2]], i64 1
+; CHECK-NEXT:    [[I2_1:%.*]] = load x86_fp80, ptr [[I2_GEP1]], align 16
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
 ; CHECK-NEXT:    [[PHI0:%.*]] = phi x86_fp80 [ [[I1_0]], [[ENTRY:%.*]] ], [ [[I2_0]], [[THEN]] ]
@@ -23,15 +22,14 @@ define i1 @test(x86_fp80* %i1, x86_fp80* %i2) {
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
 entry:
-  %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
-  %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
-  %i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
+  %i1.0 = load x86_fp80, ptr %i1, align 16
+  %i1.gep1 = getelementptr x86_fp80, ptr %i1, i64 1
+  %i1.1 = load x86_fp80, ptr %i1.gep1, align 16
   br i1 undef, label %then, label %end
 then:
-  %i2.gep0 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 0
-  %i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
-  %i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
-  %i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
+  %i2.0 = load x86_fp80, ptr %i2, align 16
+  %i2.gep1 = getelementptr inbounds x86_fp80, ptr %i2, i64 1
+  %i2.1 = load x86_fp80, ptr %i2.gep1, align 16
   br label %end
 end:
   %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
index 3850cf0ed3ed8..b182df599ce16 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
@@ -1,84 +1,83 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -mattr=+avx512f -mtriple=x86_64 -S < %s | FileCheck %s
 
-define void @test(float* noalias %0, float* %p) {
+define void @test(ptr noalias %0, ptr %p) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x float*> poison, float* [[P:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float*> [[TMP2]], <8 x float*> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x float*> [[TMP3]], <8 x i64> <i64 15, i64 4, i64 5, i64 0, i64 2, i64 6, i64 7, i64 8>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 2
-; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x ptr> [[TMP2]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[TMP3]], <8 x i64> <i64 15, i64 4, i64 5, i64 0, i64 2, i64 6, i64 7, i64 8>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 2
+; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> <i32 4, i32 3, i32 0, i32 1, i32 2, i32 0, i32 1, i32 2, i32 0, i32 2, i32 5, i32 6, i32 7, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> <float poison, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <16 x float> [[TMP8]], <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd reassoc nsz arcp contract afn <16 x float> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> poison, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 9, i32 0, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP5]] to <16 x float>*
-; CHECK-NEXT:    store <16 x float> [[TMP11]], <16 x float>* [[TMP12]], align 4
+; CHECK-NEXT:    store <16 x float> [[TMP11]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %2 = getelementptr inbounds float, float* %p, i64 2
-  %3 = getelementptr inbounds float, float* %p, i64 4
-  %4 = load float, float* %3, align 4
-  %5 = getelementptr inbounds float, float* %p, i64 5
-  %6 = load float, float* %5, align 16
-  %7 = getelementptr inbounds float, float* %p, i64 15
-  %8 = load float, float* %7, align 4
+  %2 = getelementptr inbounds float, ptr %p, i64 2
+  %3 = getelementptr inbounds float, ptr %p, i64 4
+  %4 = load float, ptr %3, align 4
+  %5 = getelementptr inbounds float, ptr %p, i64 5
+  %6 = load float, ptr %5, align 16
+  %7 = getelementptr inbounds float, ptr %p, i64 15
+  %8 = load float, ptr %7, align 4
   %9 = fadd reassoc nsz arcp contract afn float %8, 0.000000e+00
-  %10 = getelementptr inbounds float, float* %0, i64 2
-  store float %9, float* %10, align 4
+  %10 = getelementptr inbounds float, ptr %0, i64 2
+  store float %9, ptr %10, align 4
   %11 = fadd reassoc nsz arcp contract afn float %4, 0.000000e+00
-  %12 = getelementptr inbounds float, float* %0, i64 3
-  store float %11, float* %12, align 4
+  %12 = getelementptr inbounds float, ptr %0, i64 3
+  store float %11, ptr %12, align 4
   %13 = fadd reassoc nsz arcp contract afn float %6, 0.000000e+00
-  %14 = getelementptr inbounds float, float* %0, i64 4
-  store float %13, float* %14, align 4
+  %14 = getelementptr inbounds float, ptr %0, i64 4
+  store float %13, ptr %14, align 4
   %15 = fadd reassoc nsz arcp contract afn float %8, 0.000000e+00
-  %16 = getelementptr inbounds float, float* %0, i64 5
-  store float %15, float* %16, align 4
+  %16 = getelementptr inbounds float, ptr %0, i64 5
+  store float %15, ptr %16, align 4
   %17 = fadd reassoc nsz arcp contract afn float %4, 0.000000e+00
-  %18 = load float, float* %p, align 16
-  %19 = getelementptr inbounds float, float* %0, i64 6
-  store float %17, float* %19, align 4
+  %18 = load float, ptr %p, align 16
+  %19 = getelementptr inbounds float, ptr %0, i64 6
+  store float %17, ptr %19, align 4
   %20 = fadd reassoc nsz arcp contract afn float %6, 0.000000e+00
-  %21 = getelementptr inbounds float, float* %0, i64 7
-  store float %20, float* %21, align 4
+  %21 = getelementptr inbounds float, ptr %0, i64 7
+  store float %20, ptr %21, align 4
   %22 = fadd reassoc nsz arcp contract afn float %8, 0.000000e+00
-  %23 = load float, float* %2, align 8
-  %24 = getelementptr inbounds float, float* %0, i64 8
-  store float %22, float* %24, align 4
+  %23 = load float, ptr %2, align 8
+  %24 = getelementptr inbounds float, ptr %0, i64 8
+  store float %22, ptr %24, align 4
   %25 = fadd reassoc nsz arcp contract afn float %4, %18
-  %26 = getelementptr inbounds float, float* %0, i64 9
-  store float %25, float* %26, align 4
+  %26 = getelementptr inbounds float, ptr %0, i64 9
+  store float %25, ptr %26, align 4
   %27 = fadd reassoc nsz arcp contract afn float %6, 0.000000e+00
-  %28 = getelementptr inbounds float, float* %0, i64 10
-  store float %27, float* %28, align 4
+  %28 = getelementptr inbounds float, ptr %0, i64 10
+  store float %27, ptr %28, align 4
   %29 = fadd reassoc nsz arcp contract afn float %8, %23
-  %30 = getelementptr inbounds float, float* %0, i64 11
-  store float %29, float* %30, align 4
-  %31 = getelementptr inbounds float, float* %p, i64 6
-  %32 = load float, float* %31, align 4
+  %30 = getelementptr inbounds float, ptr %0, i64 11
+  store float %29, ptr %30, align 4
+  %31 = getelementptr inbounds float, ptr %p, i64 6
+  %32 = load float, ptr %31, align 4
   %33 = fadd reassoc nsz arcp contract afn float %32, 0.000000e+00
-  %34 = getelementptr inbounds float, float* %0, i64 12
-  store float %33, float* %34, align 4
-  %35 = getelementptr inbounds float, float* %p, i64 7
-  %36 = load float, float* %35, align 8
+  %34 = getelementptr inbounds float, ptr %0, i64 12
+  store float %33, ptr %34, align 4
+  %35 = getelementptr inbounds float, ptr %p, i64 7
+  %36 = load float, ptr %35, align 8
   %37 = fadd reassoc nsz arcp contract afn float %36, 0.000000e+00
-  %38 = getelementptr inbounds float, float* %0, i64 13
-  store float %37, float* %38, align 4
-  %39 = getelementptr inbounds float, float* %p, i64 8
-  %40 = load float, float* %39, align 4
+  %38 = getelementptr inbounds float, ptr %0, i64 13
+  store float %37, ptr %38, align 4
+  %39 = getelementptr inbounds float, ptr %p, i64 8
+  %40 = load float, ptr %39, align 4
   %41 = fadd reassoc nsz arcp contract afn float %40, 0.000000e+00
-  %42 = getelementptr inbounds float, float* %0, i64 14
-  store float %41, float* %42, align 4
+  %42 = getelementptr inbounds float, ptr %0, i64 14
+  store float %41, ptr %42, align 4
   %43 = fadd reassoc nsz arcp contract afn float %32, 0.000000e+00
-  %44 = getelementptr inbounds float, float* %0, i64 15
-  store float %43, float* %44, align 4
+  %44 = getelementptr inbounds float, ptr %0, i64 15
+  store float %43, ptr %44, align 4
   %45 = fadd reassoc nsz arcp contract afn float %36, 0.000000e+00
-  %46 = getelementptr inbounds float, float* %0, i64 16
-  store float %45, float* %46, align 4
+  %46 = getelementptr inbounds float, ptr %0, i64 16
+  store float %45, ptr %46, align 4
   %47 = fadd reassoc nsz arcp contract afn float %40, 0.000000e+00
-  %48 = getelementptr inbounds float, float* %0, i64 17
-  store float %47, float* %48, align 4
+  %48 = getelementptr inbounds float, ptr %0, i64 17
+  store float %47, ptr %48, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
index 4aeb9a313831f..86b1e1a801e32 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
@@ -3,10 +3,9 @@
 
 define void @test() {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* undef, i64 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr undef, i64 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr undef, i64 0, i64 1, i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer
@@ -18,30 +17,29 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP2]], align 16
 ; CHECK-NEXT:    ret void
 ;
-  %1 = getelementptr inbounds i8, i8* undef, i64 4
-  %2 = load i8, i8* %1, align 1
+  %1 = getelementptr inbounds i8, ptr undef, i64 4
+  %2 = load i8, ptr %1, align 1
   %3 = zext i8 %2 to i32
   %4 = sub nsw i32 0, %3
   %5 = shl nsw i32 %4, 0
   %6 = add nsw i32 %5, 0
-  %7 = getelementptr inbounds i8, i8* undef, i64 5
-  %8 = load i8, i8* %7, align 1
+  %7 = getelementptr inbounds i8, ptr undef, i64 5
+  %8 = load i8, ptr %7, align 1
   %9 = zext i8 %8 to i32
   %10 = sub nsw i32 0, %9
   %11 = shl nsw i32 %10, 0
   %12 = add nsw i32 %11, 0
-  %13 = getelementptr inbounds i8, i8* undef, i64 6
-  %14 = load i8, i8* %13, align 1
+  %13 = getelementptr inbounds i8, ptr undef, i64 6
+  %14 = load i8, ptr %13, align 1
   %15 = zext i8 %14 to i32
   %16 = sub nsw i32 0, %15
   %17 = shl nsw i32 %16, 0
   %18 = add nsw i32 %17, 0
-  %19 = getelementptr inbounds i8, i8* undef, i64 7
-  %20 = load i8, i8* %19, align 1
+  %19 = getelementptr inbounds i8, ptr undef, i64 7
+  %20 = load i8, ptr %19, align 1
   %21 = zext i8 %20 to i32
   %22 = sub nsw i32 0, %21
   %23 = shl nsw i32 %22, 0
@@ -51,16 +49,16 @@ define void @test() {
   %27 = add nsw i32 %24, %18
   %28 = sub nsw i32 %18, %24
   %29 = add nsw i32 0, %25
-  %30 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 0
-  store i32 %29, i32* %30, align 16
+  %30 = getelementptr inbounds [4 x [4 x i32]], ptr undef, i64 0, i64 1, i64 0
+  store i32 %29, ptr %30, align 16
   %31 = sub nsw i32 0, %27
-  %32 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 2
-  store i32 %31, i32* %32, align 8
+  %32 = getelementptr inbounds [4 x [4 x i32]], ptr undef, i64 0, i64 1, i64 2
+  store i32 %31, ptr %32, align 8
   %33 = add nsw i32 0, %26
-  %34 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 1
-  store i32 %33, i32* %34, align 4
+  %34 = getelementptr inbounds [4 x [4 x i32]], ptr undef, i64 0, i64 1, i64 1
+  store i32 %33, ptr %34, align 4
   %35 = sub nsw i32 0, %28
-  %36 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* undef, i64 0, i64 1, i64 3
-  store i32 %35, i32* %36, align 4
+  %36 = getelementptr inbounds [4 x [4 x i32]], ptr undef, i64 0, i64 1, i64 3
+  store i32 %35, ptr %36, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
index c43753d995a77..65229d8acd889 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
@@ -3,7 +3,7 @@
 
 %struct.complex = type { float, float }
 
-define  void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %Result) {
+define  void @foo (ptr %A, ptr %B, ptr %Result) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 256, 0
@@ -11,13 +11,12 @@ define  void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %R
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], %struct.complex* [[A:%.*]], i64 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[B:%.*]], i64 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[B]], i64 [[TMP1]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
-; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], ptr [[A:%.*]], i64 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], ptr [[B:%.*]], i64 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], ptr [[B]], i64 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE]]
@@ -33,9 +32,7 @@ define  void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %R
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0
-; CHECK-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP17]], <2 x float>* [[TMP21]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP17]], ptr [[RESULT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -46,14 +43,14 @@ loop:
   %1 = phi i64 [ 0, %entry ], [ %20, %loop ]
   %2 = phi float [ 0.000000e+00, %entry ], [ %19, %loop ]
   %3 = phi float [ 0.000000e+00, %entry ], [ %18, %loop ]
-  %4 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 0
-  %5 = load float, float* %4, align 4
-  %6 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 1
-  %7 = load float, float* %6, align 4
-  %8 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 0
-  %9 = load float, float* %8, align 4
-  %10 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 1
-  %11 = load float, float* %10, align 4
+  %4 = getelementptr inbounds %"struct.complex", ptr %A, i64 %1, i32 0
+  %5 = load float, ptr %4, align 4
+  %6 = getelementptr inbounds %"struct.complex", ptr %A, i64 %1, i32 1
+  %7 = load float, ptr %6, align 4
+  %8 = getelementptr inbounds %"struct.complex", ptr %B, i64 %1, i32 0
+  %9 = load float, ptr %8, align 4
+  %10 = getelementptr inbounds %"struct.complex", ptr %B, i64 %1, i32 1
+  %11 = load float, ptr %10, align 4
   %12 = fmul float %5, %9
   %13 = fmul float %7, %11
   %14 = fsub float %12, %13
@@ -67,10 +64,9 @@ loop:
   br i1 %21, label %exit, label %loop
 
 exit:
-  %22 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result,  i32 0, i32 0
-  store float %18, float* %22, align 4
-  %23 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result,  i32 0, i32 1
-  store float %19, float* %23, align 4
+  store float %18, ptr %Result, align 4
+  %22 = getelementptr inbounds %"struct.complex", ptr %Result,  i32 0, i32 1
+  store float %19, ptr %22, align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
index 1b31f8a3a98d8..09b3d25fd6dc0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
@@ -8,17 +8,15 @@
 ; into bb1, vectorizing all the way to the broadcast load at the top.
 ; The stores in bb1 are external to this tree, but they are vectorizable and are
 ; in reverse order.
-define void @rotate_with_external_users(double *%A, double *%ptr) {
+define void @rotate_with_external_users(ptr %A, ptr %ptr) {
 ; CHECK-LABEL: @rotate_with_external_users(
 ; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 2.200000e+00, double 1.100000e+00>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 2.200000e+00, double 1.100000e+00>
-; CHECK-NEXT:    [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[PTRA1]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], <double 4.400000e+00, double 3.300000e+00>
@@ -28,7 +26,7 @@ define void @rotate_with_external_users(double *%A, double *%ptr) {
 ; CHECK-NEXT:    ret void
 ;
 bb1:
-  %ld = load double, double* undef
+  %ld = load double, ptr undef
 
   %add1 = fadd double %ld, 1.1
   %add2 = fadd double %ld, 2.2
@@ -37,10 +35,9 @@ bb1:
   %mul2 = fmul double %add2, 2.2
 
   ; Thes are external vectorizable stores with operands in reverse order.
-  %ptrA1 = getelementptr inbounds double, double* %A, i64 0
-  %ptrA2 = getelementptr inbounds double, double* %A, i64 1
-  store double %mul2, double *%ptrA1
-  store double %mul1, double *%ptrA2
+  %ptrA2 = getelementptr inbounds double, ptr %A, i64 1
+  store double %mul2, ptr %A
+  store double %mul1, ptr %ptrA2
   br label %bb2
 
 bb2:
@@ -51,32 +48,31 @@ bb2:
 }
 
 ; This checks that non-consecutive external users are skipped.
-define void @non_consecutive_external_users(double *%A, double *%ptr) {
+define void @non_consecutive_external_users(ptr %A, ptr %ptr) {
 ; CHECK-LABEL: @non_consecutive_external_users(
 ; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> poison, double [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x double> [[SHUFFLE]], <double 1.100000e+00, double 2.200000e+00, double 3.300000e+00, double 4.400000e+00>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x double> [[TMP1]], <double 1.100000e+00, double 2.200000e+00, double 3.300000e+00, double 4.400000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x double> [[TMP2]], <double 1.100000e+00, double 2.200000e+00, double 3.300000e+00, double 4.400000e+00>
-; CHECK-NEXT:    [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[PTRA4:%.*]] = getelementptr inbounds double, double* [[A]], i64 3
+; CHECK-NEXT:    [[PTRA4:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP3]], i32 3
-; CHECK-NEXT:    store double [[TMP4]], double* [[PTRA1]], align 8
+; CHECK-NEXT:    store double [[TMP4]], ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP3]], i32 2
-; CHECK-NEXT:    store double [[TMP5]], double* [[PTRA1]], align 8
+; CHECK-NEXT:    store double [[TMP5]], ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP3]], i32 1
-; CHECK-NEXT:    store double [[TMP6]], double* [[PTRA4]], align 8
+; CHECK-NEXT:    store double [[TMP6]], ptr [[PTRA4]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP3]], i32 0
-; CHECK-NEXT:    store double [[TMP7]], double* [[PTRA4]], align 8
+; CHECK-NEXT:    store double [[TMP7]], ptr [[PTRA4]], align 8
 ; CHECK-NEXT:    br label [[SEED_LOOP:%.*]]
 ; CHECK:       seed_loop:
 ; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x double> [ [[TMP3]], [[BB1:%.*]] ], [ zeroinitializer, [[SEED_LOOP]] ]
 ; CHECK-NEXT:    br label [[SEED_LOOP]]
 ;
 bb1:
-  %ld = load double, double* undef
+  %ld = load double, ptr undef
 
   %add5 = fadd double %ld, 1.1
   %add6 = fadd double %ld, 2.2
@@ -94,12 +90,11 @@ bb1:
   %mul4 = fmul double %add4, 4.4
 
   ; External non-consecutive stores.
-  %ptrA1 = getelementptr inbounds double, double* %A, i64 0
-  %ptrA4 = getelementptr inbounds double, double* %A, i64 3
-  store double %mul4, double *%ptrA1
-  store double %mul3, double *%ptrA1
-  store double %mul2, double *%ptrA4
-  store double %mul1, double *%ptrA4
+  %ptrA4 = getelementptr inbounds double, ptr %A, i64 3
+  store double %mul4, ptr %A
+  store double %mul3, ptr %A
+  store double %mul2, ptr %ptrA4
+  store double %mul1, ptr %ptrA4
   br label %seed_loop
 
 seed_loop:
@@ -112,10 +107,10 @@ seed_loop:
 
 ; We have to be careful when the tree contains add/sub patterns that could be
 ; combined into a single addsub instruction. Reordering can block the pattern.
-define void @addsub_and_external_users(double *%A, double *%ptr) {
+define void @addsub_and_external_users(ptr %A, ptr %ptr) {
 ; CHECK-LABEL: @addsub_and_external_users(
 ; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
@@ -123,10 +118,8 @@ define void @addsub_and_external_users(double *%A, double *%ptr) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.100000e+00, double 2.200000e+00>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.100000e+00, double 3.200000e+00>
-; CHECK-NEXT:    [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[SHUFFLE1]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[SHUFFLE1]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], <double 4.100000e+00, double 4.200000e+00>
@@ -136,7 +129,7 @@ define void @addsub_and_external_users(double *%A, double *%ptr) {
 ; CHECK-NEXT:    ret void
 ;
 bb1:
-  %ld = load double, double* undef
+  %ld = load double, ptr undef
 
   %sub1 = fsub double %ld, 1.1
   %add2 = fadd double %ld, 1.2
@@ -148,10 +141,9 @@ bb1:
   %mul2 = fmul double %div2, 3.2
 
   ; These are external vectorizable stores with operands in reverse order.
-  %ptrA1 = getelementptr inbounds double, double* %A, i64 1
-  %ptrA0 = getelementptr inbounds double, double* %A, i64 0
-  store double %mul2, double *%ptrA0
-  store double %mul1, double *%ptrA1
+  %ptrA1 = getelementptr inbounds double, ptr %A, i64 1
+  store double %mul2, ptr %A
+  store double %mul1, ptr %ptrA1
   br label %bb2
 
 bb2:
@@ -162,10 +154,10 @@ bb2:
 }
 
 ; This contains a sub/add bundle, reordering it will make it better.
-define void @subadd_and_external_users(double *%A, double *%ptr) {
+define void @subadd_and_external_users(ptr %A, ptr %ptr) {
 ; CHECK-LABEL: @subadd_and_external_users(
 ; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
@@ -173,9 +165,7 @@ define void @subadd_and_external_users(double *%A, double *%ptr) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.200000e+00, double 2.100000e+00>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.200000e+00, double 3.100000e+00>
-; CHECK-NEXT:    [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], <double 4.200000e+00, double 4.100000e+00>
@@ -185,7 +175,7 @@ define void @subadd_and_external_users(double *%A, double *%ptr) {
 ; CHECK-NEXT:    ret void
 ;
 bb1:
-  %ld = load double, double* undef
+  %ld = load double, ptr undef
 
   %add1 = fadd double %ld, 1.1
   %sub2 = fsub double %ld, 1.2
@@ -197,10 +187,9 @@ bb1:
   %mul2 = fmul double %div2, 3.2
 
   ; These are external vectorizable stores with operands in reverse order.
-  %ptrA1 = getelementptr inbounds double, double* %A, i64 1
-  %ptrA0 = getelementptr inbounds double, double* %A, i64 0
-  store double %mul2, double *%ptrA0
-  store double %mul1, double *%ptrA1
+  %ptrA1 = getelementptr inbounds double, ptr %A, i64 1
+  store double %mul2, ptr %A
+  store double %mul1, ptr %ptrA1
   br label %bb2
 
 bb2:
@@ -210,10 +199,10 @@ bb2:
   ret void
 }
 
-define void @alt_but_not_addsub_and_external_users(double *%A, double *%ptr) {
+define void @alt_but_not_addsub_and_external_users(ptr %A, ptr %ptr) {
 ; CHECK-LABEL: @alt_but_not_addsub_and_external_users(
 ; CHECK-NEXT:  bb1:
-; CHECK-NEXT:    [[LD:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[LD:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> poison, double [[LD]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> [[SHUFFLE]], <double 1.400000e+00, double 1.300000e+00, double 1.200000e+00, double 1.100000e+00>
@@ -221,16 +210,14 @@ define void @alt_but_not_addsub_and_external_users(double *%A, double *%ptr) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <4 x double> [[TMP3]], <double 2.400000e+00, double 2.300000e+00, double 2.200000e+00, double 2.100000e+00>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], <double 3.400000e+00, double 3.300000e+00, double 3.200000e+00, double 3.100000e+00>
-; CHECK-NEXT:    [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <4 x double>*
-; CHECK-NEXT:    store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <4 x double> [[TMP5]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x double> [ [[TMP5]], [[BB1:%.*]] ], [ <double 4.400000e+00, double 4.300000e+00, double 4.200000e+00, double 4.100000e+00>, [[BB2]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ;
 bb1:
-  %ld = load double, double* undef
+  %ld = load double, ptr undef
 
   %sub1 = fsub double %ld, 1.1
   %add2 = fadd double %ld, 1.2
@@ -248,14 +235,13 @@ bb1:
   %mul4 = fmul double %div4, 3.4
 
   ; These are external vectorizable stores with operands in reverse order.
-  %ptrA3 = getelementptr inbounds double, double* %A, i64 3
-  %ptrA2 = getelementptr inbounds double, double* %A, i64 2
-  %ptrA1 = getelementptr inbounds double, double* %A, i64 1
-  %ptrA0 = getelementptr inbounds double, double* %A, i64 0
-  store double %mul4, double *%ptrA0
-  store double %mul3, double *%ptrA1
-  store double %mul2, double *%ptrA2
-  store double %mul1, double *%ptrA3
+  %ptrA3 = getelementptr inbounds double, ptr %A, i64 3
+  %ptrA2 = getelementptr inbounds double, ptr %A, i64 2
+  %ptrA1 = getelementptr inbounds double, ptr %A, i64 1
+  store double %mul4, ptr %A
+  store double %mul3, ptr %ptrA1
+  store double %mul2, ptr %ptrA2
+  store double %mul1, ptr %ptrA3
   br label %bb2
 
 bb2:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll
index 2cf6a7c7e9d99..eb5d13d6fc19d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_reordered_users.ll
@@ -42,28 +42,22 @@
 ; comment out reorderTopToBottom() and remove the stores.
 
 
-define void @reorder_crash(float* %ptr) {
+define void @reorder_crash(ptr %ptr) {
 ; CHECK-LABEL: @reorder_crash(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
 ; CHECK-NEXT:    br i1 undef, label [[BB0:%.*]], label [[BB12:%.*]]
 ; CHECK:       bb0:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb12:
 ; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[PTR]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    br label [[BB3]]
@@ -72,24 +66,23 @@ define void @reorder_crash(float* %ptr) {
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %gep0 = getelementptr inbounds float, float* %ptr, i64 0
-  %gep1 = getelementptr inbounds float, float* %ptr, i64 1
-  %gep2 = getelementptr inbounds float, float* %ptr, i64 2
-  %gep3 = getelementptr inbounds float, float* %ptr, i64 3
+  %gep1 = getelementptr inbounds float, ptr %ptr, i64 1
+  %gep2 = getelementptr inbounds float, ptr %ptr, i64 2
+  %gep3 = getelementptr inbounds float, ptr %ptr, i64 3
   br i1 undef, label %bb0, label %bb12
 
 bb0:
   ; Used by phi in this order: 1, 0, 2, 3
-  %ld00 = load float, float* %gep0
-  %ld01 = load float, float* %gep1
-  %ld02 = load float, float* %gep2
-  %ld03 = load float, float* %gep3
+  %ld00 = load float, ptr %ptr
+  %ld01 = load float, ptr %gep1
+  %ld02 = load float, ptr %gep2
+  %ld03 = load float, ptr %gep3
 
   ; External store users in natural order 0, 1, 2, 3
-  store float %ld00, float *%gep0
-  store float %ld01, float *%gep1
-  store float %ld02, float *%gep2
-  store float %ld03, float *%gep3
+  store float %ld00, ptr %ptr
+  store float %ld01, ptr %gep1
+  store float %ld02, ptr %gep2
+  store float %ld03, ptr %gep3
   br label %bb3
 
 bb12:
@@ -97,25 +90,25 @@ bb12:
 
 bb1:
   ; Used by phi in this order: 1, 0, 2, 3
-  %ld10 = load float, float* %gep0
-  %ld11 = load float, float* %gep1
-  %ld12 = load float, float* %gep2
-  %ld13 = load float, float* %gep3
+  %ld10 = load float, ptr %ptr
+  %ld11 = load float, ptr %gep1
+  %ld12 = load float, ptr %gep2
+  %ld13 = load float, ptr %gep3
 
   ; External store users in natural order 0, 1, 2, 3
-  store float %ld10, float *%gep0
-  store float %ld11, float *%gep1
-  store float %ld12, float *%gep2
-  store float %ld13, float *%gep3
+  store float %ld10, ptr %ptr
+  store float %ld11, ptr %gep1
+  store float %ld12, ptr %gep2
+  store float %ld13, ptr %gep3
 
   br label %bb3
 
 bb2:
   ; Used by fadd in this order: 2, 3, 0, 1
-  %ld20 = load float, float* %gep0
-  %ld21 = load float, float* %gep1
-  %ld22 = load float, float* %gep2
-  %ld23 = load float, float* %gep3
+  %ld20 = load float, ptr %ptr
+  %ld21 = load float, ptr %gep1
+  %ld22 = load float, ptr %gep2
+  %ld23 = load float, ptr %gep3
 
   ; Used by phi in this order: 0, 1, 2, 3
   %add20 = fadd float %ld22, 0.0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
index b01bddebf9220..4517d27598b60 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
@@ -1,12 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s
 
-define i32 @test(i32* %isec) {
+define i32 @test(ptr %isec) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[ISEC:%.*]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]]
 ; CHECK:       block1:
@@ -21,19 +19,17 @@ define i32 @test(i32* %isec) {
 ; CHECK-NEXT:    ret i32 [[TMP6]]
 ;
 entry:
-  %arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0
-  %0 = bitcast i32* %arrayidx10 to <2 x i32>*
-  %1 = load <2 x i32>, <2 x i32>* %0, align 8
-  %2 = extractelement <2 x i32> %1, i32 1
-  %3 = extractelement <2 x i32> %1, i32 0
+  %0 = load <2 x i32>, ptr %isec, align 8
+  %1 = extractelement <2 x i32> %0, i32 1
+  %2 = extractelement <2 x i32> %0, i32 0
   br i1 false, label %block1, label %block3
 block1:
   br i1 false, label %block2, label %block3
 block2:
   br label %block3
 block3:
-  %4 = phi i32 [ %2, %block1 ], [ %2, %block2 ], [ %3, %entry ]
-  %5 = phi i32 [ %3, %block1 ], [ %3, %block2 ], [ %2, %entry ]
-  %6 = mul i32 %4, %5
-  ret i32 %6
+  %3 = phi i32 [ %1, %block1 ], [ %1, %block2 ], [ %2, %entry ]
+  %4 = phi i32 [ %2, %block1 ], [ %2, %block2 ], [ %1, %entry ]
+  %5 = mul i32 %3, %4
+  ret i32 %5
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
index 8684ad42585dd..3968f58877bc0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
@@ -9,7 +9,6 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]]
 ; CHECK:       if.then22.i:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 0
 ; CHECK-NEXT:    [[SUB_I:%.*]] = add nsw i32 undef, -1
 ; CHECK-NEXT:    [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
@@ -31,8 +30,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; CHECK-NEXT:    [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP14:%.*]] = and <16 x i8> [[TMP13]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP14]], <16 x i8>* [[TMP15]], align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP14]], ptr undef, align 1
 ; CHECK-NEXT:    unreachable
 ; CHECK:       if.end50.i:
 ; CHECK-NEXT:    ret void
@@ -43,85 +41,84 @@ entry:
 if.then22.i:                                      ; preds = %entry
   %sub.i = add nsw i32 undef, -1
   %conv31.i = and i32 undef, %sub.i
-  %0 = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 0
-  %1 = trunc i32 %sub.i to i8
-  %conv.i.i1199 = and i8 %1, 1
-  store i8 %conv.i.i1199, i8* %0, align 1
+  %0 = trunc i32 %sub.i to i8
+  %conv.i.i1199 = and i8 %0, 1
+  store i8 %conv.i.i1199, ptr undef, align 1
   %shr.i.i = lshr i32 %conv31.i, 1
-  %2 = trunc i32 %shr.i.i to i8
-  %conv.1.i.i = and i8 %2, 1
-  %arrayidx.i.i7.1.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 1
-  store i8 %conv.1.i.i, i8* %arrayidx.i.i7.1.i.i, align 1
+  %1 = trunc i32 %shr.i.i to i8
+  %conv.1.i.i = and i8 %1, 1
+  %arrayidx.i.i7.1.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 1
+  store i8 %conv.1.i.i, ptr %arrayidx.i.i7.1.i.i, align 1
   %shr.1.i.i = lshr i32 %conv31.i, 2
-  %3 = trunc i32 %shr.1.i.i to i8
-  %conv.2.i.i = and i8 %3, 1
-  %arrayidx.i.i7.2.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 2
-  store i8 %conv.2.i.i, i8* %arrayidx.i.i7.2.i.i, align 1
+  %2 = trunc i32 %shr.1.i.i to i8
+  %conv.2.i.i = and i8 %2, 1
+  %arrayidx.i.i7.2.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 2
+  store i8 %conv.2.i.i, ptr %arrayidx.i.i7.2.i.i, align 1
   %shr.2.i.i = lshr i32 %conv31.i, 3
-  %4 = trunc i32 %shr.2.i.i to i8
-  %conv.3.i.i = and i8 %4, 1
-  %arrayidx.i.i7.3.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 3
-  store i8 %conv.3.i.i, i8* %arrayidx.i.i7.3.i.i, align 1
+  %3 = trunc i32 %shr.2.i.i to i8
+  %conv.3.i.i = and i8 %3, 1
+  %arrayidx.i.i7.3.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 3
+  store i8 %conv.3.i.i, ptr %arrayidx.i.i7.3.i.i, align 1
   %shr.3.i.i = lshr i32 %conv31.i, 4
-  %5 = trunc i32 %shr.3.i.i to i8
-  %conv.4.i.i = and i8 %5, 1
-  %arrayidx.i.i7.4.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 4
-  store i8 %conv.4.i.i, i8* %arrayidx.i.i7.4.i.i, align 1
+  %4 = trunc i32 %shr.3.i.i to i8
+  %conv.4.i.i = and i8 %4, 1
+  %arrayidx.i.i7.4.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 4
+  store i8 %conv.4.i.i, ptr %arrayidx.i.i7.4.i.i, align 1
   %shr.4.i.i = lshr i32 %conv31.i, 5
-  %6 = trunc i32 %shr.4.i.i to i8
-  %conv.5.i.i = and i8 %6, 1
-  %arrayidx.i.i7.5.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 5
-  store i8 %conv.5.i.i, i8* %arrayidx.i.i7.5.i.i, align 1
+  %5 = trunc i32 %shr.4.i.i to i8
+  %conv.5.i.i = and i8 %5, 1
+  %arrayidx.i.i7.5.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 5
+  store i8 %conv.5.i.i, ptr %arrayidx.i.i7.5.i.i, align 1
   %shr.5.i.i = lshr i32 %conv31.i, 6
-  %7 = trunc i32 %shr.5.i.i to i8
-  %conv.6.i.i = and i8 %7, 1
-  %arrayidx.i.i7.6.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6
-  store i8 %conv.6.i.i, i8* %arrayidx.i.i7.6.i.i, align 1
+  %6 = trunc i32 %shr.5.i.i to i8
+  %conv.6.i.i = and i8 %6, 1
+  %arrayidx.i.i7.6.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 6
+  store i8 %conv.6.i.i, ptr %arrayidx.i.i7.6.i.i, align 1
   %shr.6.i.i = lshr i32 %conv31.i, 7
-  %8 = trunc i32 %shr.6.i.i to i8
-  %conv.7.i.i = and i8 %8, 1
-  %arrayidx.i.i7.7.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7
-  store i8 %conv.7.i.i, i8* %arrayidx.i.i7.7.i.i, align 1
+  %7 = trunc i32 %shr.6.i.i to i8
+  %conv.7.i.i = and i8 %7, 1
+  %arrayidx.i.i7.7.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 7
+  store i8 %conv.7.i.i, ptr %arrayidx.i.i7.7.i.i, align 1
   %shr.7.i.i = lshr i32 %conv31.i, 8
-  %9 = trunc i32 %shr.7.i.i to i8
-  %conv.8.i.i = and i8 %9, 1
-  %arrayidx.i.i7.8.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8
-  store i8 %conv.8.i.i, i8* %arrayidx.i.i7.8.i.i, align 1
+  %8 = trunc i32 %shr.7.i.i to i8
+  %conv.8.i.i = and i8 %8, 1
+  %arrayidx.i.i7.8.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 8
+  store i8 %conv.8.i.i, ptr %arrayidx.i.i7.8.i.i, align 1
   %shr.8.i.i = lshr i32 %conv31.i, 9
-  %10 = trunc i32 %shr.8.i.i to i8
-  %conv.9.i.i = and i8 %10, 1
-  %arrayidx.i.i7.9.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9
-  store i8 %conv.9.i.i, i8* %arrayidx.i.i7.9.i.i, align 1
+  %9 = trunc i32 %shr.8.i.i to i8
+  %conv.9.i.i = and i8 %9, 1
+  %arrayidx.i.i7.9.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 9
+  store i8 %conv.9.i.i, ptr %arrayidx.i.i7.9.i.i, align 1
   %shr.9.i.i = lshr i32 %conv31.i, 10
-  %11 = trunc i32 %shr.9.i.i to i8
-  %conv.10.i.i = and i8 %11, 1
-  %arrayidx.i.i7.10.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10
-  store i8 %conv.10.i.i, i8* %arrayidx.i.i7.10.i.i, align 1
+  %10 = trunc i32 %shr.9.i.i to i8
+  %conv.10.i.i = and i8 %10, 1
+  %arrayidx.i.i7.10.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 10
+  store i8 %conv.10.i.i, ptr %arrayidx.i.i7.10.i.i, align 1
   %shr.10.i.i = lshr i32 %conv31.i, 11
-  %12 = trunc i32 %shr.10.i.i to i8
-  %conv.11.i.i = and i8 %12, 1
-  %arrayidx.i.i7.11.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11
-  store i8 %conv.11.i.i, i8* %arrayidx.i.i7.11.i.i, align 1
+  %11 = trunc i32 %shr.10.i.i to i8
+  %conv.11.i.i = and i8 %11, 1
+  %arrayidx.i.i7.11.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 11
+  store i8 %conv.11.i.i, ptr %arrayidx.i.i7.11.i.i, align 1
   %shr.11.i.i = lshr i32 %conv31.i, 12
-  %13 = trunc i32 %shr.11.i.i to i8
-  %conv.12.i.i = and i8 %13, 1
-  %arrayidx.i.i7.12.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12
-  store i8 %conv.12.i.i, i8* %arrayidx.i.i7.12.i.i, align 1
+  %12 = trunc i32 %shr.11.i.i to i8
+  %conv.12.i.i = and i8 %12, 1
+  %arrayidx.i.i7.12.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 12
+  store i8 %conv.12.i.i, ptr %arrayidx.i.i7.12.i.i, align 1
   %shr.12.i.i = lshr i32 %conv31.i, 13
-  %14 = trunc i32 %shr.12.i.i to i8
-  %conv.13.i.i = and i8 %14, 1
-  %arrayidx.i.i7.13.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13
-  store i8 %conv.13.i.i, i8* %arrayidx.i.i7.13.i.i, align 1
+  %13 = trunc i32 %shr.12.i.i to i8
+  %conv.13.i.i = and i8 %13, 1
+  %arrayidx.i.i7.13.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 13
+  store i8 %conv.13.i.i, ptr %arrayidx.i.i7.13.i.i, align 1
   %shr.13.i.i = lshr i32 %conv31.i, 14
-  %15 = trunc i32 %shr.13.i.i to i8
-  %conv.14.i.i = and i8 %15, 1
-  %arrayidx.i.i7.14.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
-  store i8 %conv.14.i.i, i8* %arrayidx.i.i7.14.i.i, align 1
+  %14 = trunc i32 %shr.13.i.i to i8
+  %conv.14.i.i = and i8 %14, 1
+  %arrayidx.i.i7.14.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 14
+  store i8 %conv.14.i.i, ptr %arrayidx.i.i7.14.i.i, align 1
   %shr.14.i.i = lshr i32 %conv31.i, 15
-  %16 = trunc i32 %shr.14.i.i to i8
-  %conv.15.i.i = and i8 %16, 1
-  %arrayidx.i.i7.15.i.i = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15
-  store i8 %conv.15.i.i, i8* %arrayidx.i.i7.15.i.i, align 1
+  %15 = trunc i32 %shr.14.i.i to i8
+  %conv.15.i.i = and i8 %15, 1
+  %arrayidx.i.i7.15.i.i = getelementptr inbounds %"struct.std::array", ptr undef, i64 0, i32 0, i64 15
+  store i8 %conv.15.i.i, ptr %arrayidx.i.i7.15.i.i, align 1
   unreachable
 
 if.end50.i:                                       ; preds = %entry

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/return.ll b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
index a15834cf7ac26..31b328ca2f513 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/return.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
@@ -16,8 +16,8 @@ target triple = "x86_64--linux-gnu"
 define double @return1() {
 ; CHECK-LABEL: @return1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([4 x double]* @a to <2 x double>*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([4 x double]* @b to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr @a, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @b, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
@@ -25,28 +25,26 @@ define double @return1() {
 ; CHECK-NEXT:    ret double [[ADD2]]
 ;
 entry:
-  %a0 = load double, double* getelementptr inbounds ([4 x double], [4 x double]* @a, i32 0, i32 0), align 8
-  %b0 = load double, double* getelementptr inbounds ([4 x double], [4 x double]* @b, i32 0, i32 0), align 8
+  %a0 = load double, ptr @a, align 8
+  %b0 = load double, ptr @b, align 8
   %add0 = fadd double %a0, %b0
-  %a1 = load double, double* getelementptr inbounds ([4 x double], [4 x double]* @a, i32 0, i32 1), align 8
-  %b1 = load double, double* getelementptr inbounds ([4 x double], [4 x double]* @b, i32 0, i32 1), align 8
+  %a1 = load double, ptr getelementptr inbounds ([4 x double], ptr @a, i32 0, i32 1), align 8
+  %b1 = load double, ptr getelementptr inbounds ([4 x double], ptr @b, i32 0, i32 1), align 8
   %add1 = fadd double %a1, %b1
   %add2 = fadd double %add0, %add1
   ret double %add2
 }
 
-; double hadd(double *x) {
+; double hadd(ptr x) {
 ;   return ((x[0] + x[2]) + (x[1] + x[3]));
 ; }
 
-define double @return2(double* nocapture readonly %x) {
+define double @return2(ptr nocapture readonly %x) {
 ; CHECK-LABEL: @return2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i32 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[X]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX1]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[X]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
@@ -54,14 +52,14 @@ define double @return2(double* nocapture readonly %x) {
 ; CHECK-NEXT:    ret double [[ADD5]]
 ;
 entry:
-  %x0 = load double, double* %x, align 4
-  %arrayidx1 = getelementptr inbounds double, double* %x, i32 2
-  %x2 = load double, double* %arrayidx1, align 4
+  %x0 = load double, ptr %x, align 4
+  %arrayidx1 = getelementptr inbounds double, ptr %x, i32 2
+  %x2 = load double, ptr %arrayidx1, align 4
   %add3 = fadd double %x0, %x2
-  %arrayidx2 = getelementptr inbounds double, double* %x, i32 1
-  %x1 = load double, double* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds double, double* %x, i32 3
-  %x3 = load double, double* %arrayidx3, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i32 1
+  %x1 = load double, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds double, ptr %x, i32 3
+  %x3 = load double, ptr %arrayidx3, align 4
   %add4 = fadd double %x1, %x3
   %add5 = fadd double %add3, %add4
   ret double %add5

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
index 446c3bfab6d6c..6bbc33bac80f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
@@ -4,35 +4,32 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 
 %struct.S = type { [3 x float], [3 x float], [4 x float] }
 
-define i32 @foo(i32 %0, i32* %1, float* %2)  {
+define i32 @foo(i32 %0, ptr %1, ptr %2)  {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[T4:%.*]] = alloca [[STRUCT_S:%.*]], align 8
-; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 1
-; CHECK-NEXT:    [[T9:%.*]] = getelementptr inbounds [3 x float], [3 x float]* [[T8]], i64 1, i64 0
-; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 1, i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[T14]] to <2 x float>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 1
+; CHECK-NEXT:    [[T9:%.*]] = getelementptr inbounds [3 x float], ptr [[T8]], i64 1, i64 0
+; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 1, i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, ptr [[T14]], align 4
 ; CHECK-NEXT:    br label [[T37:%.*]]
 ; CHECK:       t37:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[TMP6]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[T4]], i64 0, i32 2, i64 0
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[T21]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP8]], align 4
-; CHECK-NEXT:    [[T88:%.*]] = bitcast float* [[T9]] to <2 x float>*
-; CHECK-NEXT:    [[T89]] = load <2 x float>, <2 x float>* [[T88]], align 4
+; CHECK-NEXT:    [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 2, i64 0
+; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], ptr [[T21]], align 4
+; CHECK-NEXT:    [[T89]] = load <2 x float>, ptr [[T9]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[T37]], label [[T55:%.*]]
 ; CHECK:       t55:
 ; CHECK-NEXT:    ret i32 0
 ;
   %t4 = alloca %struct.S, align 8
-  %t8 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 1
-  %t9 = getelementptr inbounds [3 x float], [3 x float]* %t8, i64 1, i64 0
-  %t14 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 1, i64 0
-  %t11 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 1, i64 1
-  %t15 = load float, float* %t14, align 4
-  %t16 = load float, float* %t11, align 4
+  %t8 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 1
+  %t9 = getelementptr inbounds [3 x float], ptr %t8, i64 1, i64 0
+  %t14 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 1, i64 0
+  %t11 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 1, i64 1
+  %t15 = load float, ptr %t14, align 4
+  %t16 = load float, ptr %t11, align 4
   br label %t37
 
 t37:
@@ -41,16 +38,15 @@ t37:
   %t19 = phi float [ %t15, %3 ], [ %x23, %t37 ]
   %t20 = fdiv fast float 1.000000e+00, %t19
   %t24 = fdiv fast float 1.000000e+00, %t18
-  %t21 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 2, i64 0
-  %t25 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 2, i64 1
-  %t31 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 2, i64 2
-  %t33 = getelementptr inbounds %struct.S, %struct.S* %t4, i64 0, i32 2, i64 3
-  store float %t20, float* %t21, align 4
-  store float %t24, float* %t25, align 4
-  store float %t24, float* %t31, align 4
-  store float %t24, float* %t33, align 4
-  %t88 = bitcast float* %t9 to <2 x float>*
-  %t89 = load <2 x float>, <2 x float>* %t88, align 4
+  %t21 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 2, i64 0
+  %t25 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 2, i64 1
+  %t31 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 2, i64 2
+  %t33 = getelementptr inbounds %struct.S, ptr %t4, i64 0, i32 2, i64 3
+  store float %t20, ptr %t21, align 4
+  store float %t24, ptr %t25, align 4
+  store float %t24, ptr %t31, align 4
+  store float %t24, ptr %t33, align 4
+  %t89 = load <2 x float>, ptr %t9, align 4
   %x23 = extractelement <2 x float> %t89, i32 0
   %x24 = extractelement <2 x float> %t89, i32 1
   br i1 undef, label %t37, label %t55

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
index 38840e5f9b49a..f719cb545a75b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll
@@ -17,15 +17,13 @@ define void @test() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[IF_END:%.*]], label [[FOR_COND_PREHEADER:%.*]]
 ; CHECK:       for.cond.preheader:
-; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 2
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[I]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 8
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 2
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[I]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX7:%.*]] = add i32 [[TMP2]], undef
 ; CHECK-NEXT:    [[OP_RDX8:%.*]] = add i32 [[OP_RDX7]], undef
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[I1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[I1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[OP_RDX5:%.*]] = add i32 [[TMP5]], undef
 ; CHECK-NEXT:    [[OP_RDX6:%.*]] = add i32 [[OP_RDX5]], undef
@@ -44,15 +42,15 @@ entry:
   br i1 undef, label %if.end, label %for.cond.preheader
 
 for.cond.preheader:                               ; preds = %entry
-  %i = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 2
-  %i1 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 3
-  %i2 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 4
-  %i3 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 5
-  %i4 = getelementptr inbounds [100 x i32], [100 x i32]* undef, i64 0, i64 6
-  %ld0 = load i32, i32* %i, align 8
-  %ld1 = load i32, i32* %i1, align 4
-  %ld2 = load i32, i32* %i2, align 16
-  %ld3 = load i32, i32* %i3, align 4
+  %i = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 2
+  %i1 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 3
+  %i2 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 4
+  %i3 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 5
+  %i4 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 6
+  %ld0 = load i32, ptr %i, align 8
+  %ld1 = load i32, ptr %i1, align 4
+  %ld2 = load i32, ptr %i2, align 16
+  %ld3 = load i32, ptr %i3, align 4
   %i5 = add i32 undef, undef
   %i6 = add i32 %i5, %ld3
   %i7 = add i32 %i6, %ld2
@@ -60,10 +58,10 @@ for.cond.preheader:                               ; preds = %entry
   %i9 = add i32 %i8, %ld0
   %i10 = add i32 %i9, undef
   %i11 = add i32 %i9, %i10
-  %ld4 = load i32, i32* %i1, align 4
-  %ld5 = load i32, i32* %i2, align 16
-  %ld6 = load i32, i32* %i3, align 4
-  %ld7 = load i32, i32* %i4, align 8
+  %ld4 = load i32, ptr %i1, align 4
+  %ld5 = load i32, ptr %i2, align 16
+  %ld6 = load i32, ptr %i3, align 4
+  %ld7 = load i32, ptr %i4, align 8
   %i12 = add i32 undef, undef
   %i13 = add i32 %i12, %ld7
   %i14 = add i32 %i13, %ld6

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll b/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
index 5e5a4e90b4957..9f82ca4ae750a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
@@ -27,18 +27,18 @@ entry:
   ret float %add.3
 }
 
-define double @dotd(<4 x double>* byval(<4 x double>) nocapture readonly align 32, <4 x double>* byval(<4 x double>) nocapture readonly align 32) {
+define double @dotd(ptr byval(<4 x double>) nocapture readonly align 32, ptr byval(<4 x double>) nocapture readonly align 32) {
 ; CHECK-LABEL: @dotd(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32
-; CHECK-NEXT:    [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32
+; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32
+; CHECK-NEXT:    [[Y:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP2]])
 ; CHECK-NEXT:    ret double [[TMP3]]
 ;
 entry:
-  %x = load <4 x double>, <4 x double>* %0, align 32
-  %y = load <4 x double>, <4 x double>* %1, align 32
+  %x = load <4 x double>, ptr %0, align 32
+  %y = load <4 x double>, ptr %1, align 32
   %vecext = extractelement <4 x double> %x, i32 0
   %vecext1 = extractelement <4 x double> %y, i32 0
   %mul = fmul fast double %vecext, %vecext1
@@ -57,18 +57,18 @@ entry:
   ret double %add.3
 }
 
-define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture readonly %y) {
+define float @dotfq(ptr nocapture readonly %x, ptr nocapture readonly %y) {
 ; CHECK-LABEL: @dotfq(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Y:%.*]], align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %x, align 16
-  %1 = load <4 x float>, <4 x float>* %y, align 16
+  %0 = load <4 x float>, ptr %x, align 16
+  %1 = load <4 x float>, ptr %y, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %vecext1 = extractelement <4 x float> %1, i32 0
   %mul = fmul fast float %vecext1, %vecext
@@ -87,18 +87,18 @@ entry:
   ret float %add.3
 }
 
-define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocapture readonly %y) {
+define double @dotdq(ptr nocapture readonly %x, ptr nocapture readonly %y) {
 ; CHECK-LABEL: @dotdq(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[X:%.*]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr [[Y:%.*]], align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP2]])
 ; CHECK-NEXT:    ret double [[TMP3]]
 ;
 entry:
-  %0 = load <4 x double>, <4 x double>* %x, align 32
-  %1 = load <4 x double>, <4 x double>* %y, align 32
+  %0 = load <4 x double>, ptr %x, align 32
+  %1 = load <4 x double>, ptr %y, align 32
   %vecext = extractelement <4 x double> %0, i32 0
   %vecext1 = extractelement <4 x double> %1, i32 0
   %mul = fmul fast double %vecext1, %vecext

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
index 93948a3114b39..6b7b202e185e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -6,27 +6,26 @@ target triple = "i386-apple-macosx10.9.0"
 
 ; We disable the vectorization of <3 x float> for now
 
-; float foo(float *A) {
+; float foo(ptr A) {
 ;
 ;   float R = A[0];
 ;   float G = A[1];
 ;   float B = A[2];
 ;   for (int i=0; i < 121; i+=3) {
-;     R+=A[i+0]*7;
-;     G+=A[i+1]*8;
-;     B+=A[i+2]*9;
+;     R+=Aptr7;
+;     G+=Aptr8;
+;     B+=Aptr9;
 ;   }
 ;
 ;   return R+G+B;
 ; }
 
-define float @foo(float* nocapture readonly %A) {
+define float @foo(ptr nocapture readonly %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
@@ -35,15 +34,15 @@ define float @foo(float* nocapture readonly %A) {
 ; CHECK-NEXT:    [[B_032:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD14:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP7]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x float> [[TMP9]], <float 7.000000e+00, float 8.000000e+00>
 ; CHECK-NEXT:    [[TMP11]] = fadd <2 x float> [[TMP5]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX12]], align 4
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX12]], align 4
 ; CHECK-NEXT:    [[MUL13:%.*]] = fmul float [[TMP13]], 9.000000e+00
 ; CHECK-NEXT:    [[ADD14]] = fadd float [[B_032]], [[MUL13]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 3
@@ -51,8 +50,8 @@ define float @foo(float* nocapture readonly %A) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP14]], 121
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.for.body_crit_edge:
-; CHECK-NEXT:    [[ARRAYIDX3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[DOTPRE]] = load float, float* [[ARRAYIDX3_PHI_TRANS_INSERT]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[DOTPRE]] = load float, ptr [[ARRAYIDX3_PHI_TRANS_INSERT]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
@@ -62,11 +61,11 @@ define float @foo(float* nocapture readonly %A) {
 ; CHECK-NEXT:    ret float [[ADD17]]
 ;
 entry:
-  %0 = load float, float* %A, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
-  %1 = load float, float* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
-  %2 = load float, float* %arrayidx2, align 4
+  %0 = load float, ptr %A, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %A, i64 1
+  %1 = load float, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 2
+  %2 = load float, ptr %arrayidx2, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
@@ -78,13 +77,13 @@ for.body:                                         ; preds = %for.body.for.body_c
   %mul = fmul float %3, 7.000000e+00
   %add4 = fadd float %R.030, %mul
   %4 = add nsw i64 %indvars.iv, 1
-  %arrayidx7 = getelementptr inbounds float, float* %A, i64 %4
-  %5 = load float, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %A, i64 %4
+  %5 = load float, ptr %arrayidx7, align 4
   %mul8 = fmul float %5, 8.000000e+00
   %add9 = fadd float %G.031, %mul8
   %6 = add nsw i64 %indvars.iv, 2
-  %arrayidx12 = getelementptr inbounds float, float* %A, i64 %6
-  %7 = load float, float* %arrayidx12, align 4
+  %arrayidx12 = getelementptr inbounds float, ptr %A, i64 %6
+  %7 = load float, ptr %arrayidx12, align 4
   %mul13 = fmul float %7, 9.000000e+00
   %add14 = fadd float %B.032, %mul13
   %indvars.iv.next = add i64 %indvars.iv, 3
@@ -93,8 +92,8 @@ for.body:                                         ; preds = %for.body.for.body_c
   br i1 %cmp, label %for.body.for.body_crit_edge, label %for.end
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %arrayidx3.phi.trans.insert = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
-  %.pre = load float, float* %arrayidx3.phi.trans.insert, align 4
+  %arrayidx3.phi.trans.insert = getelementptr inbounds float, ptr %A, i64 %indvars.iv.next
+  %.pre = load float, ptr %arrayidx3.phi.trans.insert, align 4
   br label %for.body
 
 for.end:                                          ; preds = %for.body

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
index bcfec7f22e5f8..af323a81f152f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -5,80 +5,75 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; SLP vectorization example from http://cs.stanford.edu/people/eschkufz/research/asplos291-schkufza.pdf
-define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
+define void @SAXPY(ptr noalias nocapture %x, ptr noalias nocapture %y, i32 %a, i64 %i) {
 ; CHECK-LABEL: @SAXPY(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[I:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 [[I]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP4]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %1 = getelementptr inbounds i32, i32* %x, i64 %i
-  %2 = load i32, i32* %1, align 4
+  %1 = getelementptr inbounds i32, ptr %x, i64 %i
+  %2 = load i32, ptr %1, align 4
   %3 = mul nsw i32 %2, %a
-  %4 = getelementptr inbounds i32, i32* %y, i64 %i
-  %5 = load i32, i32* %4, align 4
+  %4 = getelementptr inbounds i32, ptr %y, i64 %i
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %3, %5
-  store i32 %6, i32* %1, align 4
+  store i32 %6, ptr %1, align 4
   %7 = add i64 %i, 1
-  %8 = getelementptr inbounds i32, i32* %x, i64 %7
-  %9 = load i32, i32* %8, align 4
+  %8 = getelementptr inbounds i32, ptr %x, i64 %7
+  %9 = load i32, ptr %8, align 4
   %10 = mul nsw i32 %9, %a
-  %11 = getelementptr inbounds i32, i32* %y, i64 %7
-  %12 = load i32, i32* %11, align 4
+  %11 = getelementptr inbounds i32, ptr %y, i64 %7
+  %12 = load i32, ptr %11, align 4
   %13 = add nsw i32 %10, %12
-  store i32 %13, i32* %8, align 4
+  store i32 %13, ptr %8, align 4
   %14 = add i64 %i, 2
-  %15 = getelementptr inbounds i32, i32* %x, i64 %14
-  %16 = load i32, i32* %15, align 4
+  %15 = getelementptr inbounds i32, ptr %x, i64 %14
+  %16 = load i32, ptr %15, align 4
   %17 = mul nsw i32 %16, %a
-  %18 = getelementptr inbounds i32, i32* %y, i64 %14
-  %19 = load i32, i32* %18, align 4
+  %18 = getelementptr inbounds i32, ptr %y, i64 %14
+  %19 = load i32, ptr %18, align 4
   %20 = add nsw i32 %17, %19
-  store i32 %20, i32* %15, align 4
+  store i32 %20, ptr %15, align 4
   %21 = add i64 %i, 3
-  %22 = getelementptr inbounds i32, i32* %x, i64 %21
-  %23 = load i32, i32* %22, align 4
+  %22 = getelementptr inbounds i32, ptr %x, i64 %21
+  %23 = load i32, ptr %22, align 4
   %24 = mul nsw i32 %23, %a
-  %25 = getelementptr inbounds i32, i32* %y, i64 %21
-  %26 = load i32, i32* %25, align 4
+  %25 = getelementptr inbounds i32, ptr %y, i64 %21
+  %26 = load i32, ptr %25, align 4
   %27 = add nsw i32 %24, %26
-  store i32 %27, i32* %22, align 4
+  store i32 %27, ptr %22, align 4
   ret void
 }
 
 ; Make sure we don't crash on this one.
-define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i64 %i) {
+define void @SAXPY_crash(ptr noalias nocapture %x, ptr noalias nocapture %y, i64 %i) {
 ; CHECK-LABEL: @SAXPY_crash(
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[I:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i32> undef, [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %1 = add i64 %i, 1
-  %2 = getelementptr inbounds i32, i32* %x, i64 %1
-  %3 = getelementptr inbounds i32, i32* %y, i64 %1
-  %4 = load i32, i32* %3, align 4
+  %2 = getelementptr inbounds i32, ptr %x, i64 %1
+  %3 = getelementptr inbounds i32, ptr %y, i64 %1
+  %4 = load i32, ptr %3, align 4
   %5 = add nsw i32 undef, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %6 = add i64 %i, 2
-  %7 = getelementptr inbounds i32, i32* %x, i64 %6
-  %8 = getelementptr inbounds i32, i32* %y, i64 %6
-  %9 = load i32, i32* %8, align 4
+  %7 = getelementptr inbounds i32, ptr %x, i64 %6
+  %8 = getelementptr inbounds i32, ptr %y, i64 %6
+  %9 = load i32, ptr %8, align 4
   %10 = add nsw i32 undef, %9
-  store i32 %10, i32* %7, align 4
+  store i32 %10, ptr %7, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
index 724101bd93233..f10e0a8408166 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
@@ -10,40 +10,40 @@ target triple = "x86_64-unknown-linux-gnu"
 define i32 @slp_schedule_bundle() local_unnamed_addr #0 {
 ; CHECK-LABEL: @slp_schedule_bundle(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([1 x i32]* @b to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], <i32 31, i32 31, i32 31, i32 31>
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([1 x i32]* @a to <4 x i32>*), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0) to <2 x i32>*), align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @a, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr ([1 x i32], ptr @b, i64 4, i64 0), align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], <i32 31, i32 31>
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 1, i32 1>
-; CHECK-NEXT:    store <2 x i32> [[TMP5]], <2 x i32>* bitcast (i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 4, i64 0) to <2 x i32>*), align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr getelementptr ([1 x i32], ptr @a, i64 4, i64 0), align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @b, i64 0, i64 0), align 4
+  %0 = load i32, ptr @b, align 4
   %.lobit = lshr i32 %0, 31
   %.lobit.not = xor i32 %.lobit, 1
-  store i32 %.lobit.not, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i64 0, i64 0), align 4
-  %1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @b, i64 1, i64 0), align 4
+  store i32 %.lobit.not, ptr @a, align 4
+  %1 = load i32, ptr getelementptr inbounds ([1 x i32], ptr @b, i64 1, i64 0), align 4
   %.lobit.1 = lshr i32 %1, 31
   %.lobit.not.1 = xor i32 %.lobit.1, 1
-  store i32 %.lobit.not.1, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i64 1, i64 0), align 4
-  %2 = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 2, i64 0), align 4
+  store i32 %.lobit.not.1, ptr getelementptr inbounds ([1 x i32], ptr @a, i64 1, i64 0), align 4
+  %2 = load i32, ptr getelementptr ([1 x i32], ptr @b, i64 2, i64 0), align 4
   %.lobit.2 = lshr i32 %2, 31
   %.lobit.not.2 = xor i32 %.lobit.2, 1
-  store i32 %.lobit.not.2, i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 2, i64 0), align 4
-  %3 = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 3, i64 0), align 4
+  store i32 %.lobit.not.2, ptr getelementptr ([1 x i32], ptr @a, i64 2, i64 0), align 4
+  %3 = load i32, ptr getelementptr ([1 x i32], ptr @b, i64 3, i64 0), align 4
   %.lobit.3 = lshr i32 %3, 31
   %.lobit.not.3 = xor i32 %.lobit.3, 1
-  store i32 %.lobit.not.3, i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 3, i64 0), align 4
-  %4 = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0), align 4
+  store i32 %.lobit.not.3, ptr getelementptr ([1 x i32], ptr @a, i64 3, i64 0), align 4
+  %4 = load i32, ptr getelementptr ([1 x i32], ptr @b, i64 4, i64 0), align 4
   %.lobit.4 = lshr i32 %4, 31
   %.lobit.not.4 = xor i32 %.lobit.4, 1
-  store i32 %.lobit.not.4, i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 4, i64 0), align 4
-  %5 = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 5, i64 0), align 4
+  store i32 %.lobit.not.4, ptr getelementptr ([1 x i32], ptr @a, i64 4, i64 0), align 4
+  %5 = load i32, ptr getelementptr ([1 x i32], ptr @b, i64 5, i64 0), align 4
   %.lobit.5 = lshr i32 %5, 31
   %.lobit.not.5 = xor i32 %.lobit.5, 1
-  store i32 %.lobit.not.5, i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 5, i64 0), align 4
+  store i32 %.lobit.not.5, ptr getelementptr ([1 x i32], ptr @a, i64 5, i64 0), align 4
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
index b1173f88f3246..c6d2e8d9286da 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -9,11 +9,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 
 declare void @unknown()
 
-define void @test(float * %a, float * %b, float * %c, float * %d) {
+define void @test(ptr %a, ptr %b, ptr %c, ptr %d) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @unknown()
@@ -42,23 +41,20 @@ define void @test(float * %a, float * %b, float * %c, float * %d) {
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[D:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP4]], ptr [[D:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   ; Don't vectorize these loads.
-  %l0 = load float, float* %a
-  %a1 = getelementptr inbounds float, float* %a, i64 1
-  %l1 = load float, float* %a1
-  %a2 = getelementptr inbounds float, float* %a, i64 2
-  %l2 = load float, float* %a2
-  %a3 = getelementptr inbounds float, float* %a, i64 3
-  %l3 = load float, float* %a3
+  %l0 = load float, ptr %a
+  %a1 = getelementptr inbounds float, ptr %a, i64 1
+  %l1 = load float, ptr %a1
+  %a2 = getelementptr inbounds float, ptr %a, i64 2
+  %l2 = load float, ptr %a2
+  %a3 = getelementptr inbounds float, ptr %a, i64 3
+  %l3 = load float, ptr %a3
 
   ; some unrelated instructions inbetween to enlarge the scheduling region
   call void @unknown()
@@ -91,31 +87,31 @@ entry:
   call void @unknown()
 
   ; Don't vectorize these stores because their operands are too far away.
-  store float %l0, float* %b
-  %b1 = getelementptr inbounds float, float* %b, i64 1
-  store float %l1, float* %b1
-  %b2 = getelementptr inbounds float, float* %b, i64 2
-  store float %l2, float* %b2
-  %b3 = getelementptr inbounds float, float* %b, i64 3
-  store float %l3, float* %b3
+  store float %l0, ptr %b
+  %b1 = getelementptr inbounds float, ptr %b, i64 1
+  store float %l1, ptr %b1
+  %b2 = getelementptr inbounds float, ptr %b, i64 2
+  store float %l2, ptr %b2
+  %b3 = getelementptr inbounds float, ptr %b, i64 3
+  store float %l3, ptr %b3
 
   ; But still vectorize the following instructions, because even if the budget
   ; is exceeded there is a minimum region size.
-  %l4 = load float, float* %c
-  %c1 = getelementptr inbounds float, float* %c, i64 1
-  %l5 = load float, float* %c1
-  %c2 = getelementptr inbounds float, float* %c, i64 2
-  %l6 = load float, float* %c2
-  %c3 = getelementptr inbounds float, float* %c, i64 3
-  %l7 = load float, float* %c3
+  %l4 = load float, ptr %c
+  %c1 = getelementptr inbounds float, ptr %c, i64 1
+  %l5 = load float, ptr %c1
+  %c2 = getelementptr inbounds float, ptr %c, i64 2
+  %l6 = load float, ptr %c2
+  %c3 = getelementptr inbounds float, ptr %c, i64 3
+  %l7 = load float, ptr %c3
 
-  store float %l4, float* %d
-  %d1 = getelementptr inbounds float, float* %d, i64 1
-  store float %l5, float* %d1
-  %d2 = getelementptr inbounds float, float* %d, i64 2
-  store float %l6, float* %d2
-  %d3 = getelementptr inbounds float, float* %d, i64 3
-  store float %l7, float* %d3
+  store float %l4, ptr %d
+  %d1 = getelementptr inbounds float, ptr %d, i64 1
+  store float %l5, ptr %d1
+  %d2 = getelementptr inbounds float, ptr %d, i64 2
+  store float %l6, ptr %d2
+  %d3 = getelementptr inbounds float, ptr %d, i64 3
+  store float %l7, ptr %d3
 
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
index 8b74ade726853..87d2a00341563 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -1,95 +1,88 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
-define i32 @foo(i32* nocapture readonly %
diff ) #0 {
+define i32 @foo(ptr nocapture readonly %
diff ) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M2:%.*]] = alloca [8 x [8 x i32]], align 16
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [8 x [8 x i32]]* [[M2]] to i8*
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[A_088:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DIFF:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DIFF:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[DIFF]], i64 [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DIFF]], i64 [[TMP2]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [8 x [8 x i32]], ptr [[M2]], i64 0, i64 [[INDVARS_IV]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[ARRAYIDX6]], align 16
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP9]], [[A_088]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 0
-; CHECK-NEXT:    call void @ff([8 x i32]* [[ARRAYDECAY]])
+; CHECK-NEXT:    call void @ff(ptr [[M2]])
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
 ;
 entry:
   %m2 = alloca [8 x [8 x i32]], align 16
-  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
-  %1 = shl i64 %indvars.iv, 3
-  %arrayidx = getelementptr inbounds i32, i32* %
diff , i64 %1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = or i64 %1, 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %
diff , i64 %3
-  %4 = load i32, i32* %arrayidx2, align 4
-  %add3 = add nsw i32 %4, %2
-  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
-  store i32 %add3, i32* %arrayidx6, align 16
+  %0 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32, ptr %
diff , i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
+  %2 = or i64 %0, 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %
diff , i64 %2
+  %3 = load i32, ptr %arrayidx2, align 4
+  %add3 = add nsw i32 %3, %1
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, ptr %arrayidx6, align 16
   %add10 = add nsw i32 %add3, %a.088
-  %5 = or i64 %1, 1
-  %arrayidx13 = getelementptr inbounds i32, i32* %
diff , i64 %5
-  %6 = load i32, i32* %arrayidx13, align 4
-  %7 = or i64 %1, 5
-  %arrayidx16 = getelementptr inbounds i32, i32* %
diff , i64 %7
-  %8 = load i32, i32* %arrayidx16, align 4
-  %add17 = add nsw i32 %8, %6
-  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
-  store i32 %add17, i32* %arrayidx20, align 4
+  %4 = or i64 %0, 1
+  %arrayidx13 = getelementptr inbounds i32, ptr %
diff , i64 %4
+  %5 = load i32, ptr %arrayidx13, align 4
+  %6 = or i64 %0, 5
+  %arrayidx16 = getelementptr inbounds i32, ptr %
diff , i64 %6
+  %7 = load i32, ptr %arrayidx16, align 4
+  %add17 = add nsw i32 %7, %5
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, ptr %arrayidx20, align 4
   %add24 = add nsw i32 %add10, %add17
-  %9 = or i64 %1, 2
-  %arrayidx27 = getelementptr inbounds i32, i32* %
diff , i64 %9
-  %10 = load i32, i32* %arrayidx27, align 4
-  %11 = or i64 %1, 6
-  %arrayidx30 = getelementptr inbounds i32, i32* %
diff , i64 %11
-  %12 = load i32, i32* %arrayidx30, align 4
-  %add31 = add nsw i32 %12, %10
-  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
-  store i32 %add31, i32* %arrayidx34, align 8
+  %8 = or i64 %0, 2
+  %arrayidx27 = getelementptr inbounds i32, ptr %
diff , i64 %8
+  %9 = load i32, ptr %arrayidx27, align 4
+  %10 = or i64 %0, 6
+  %arrayidx30 = getelementptr inbounds i32, ptr %
diff , i64 %10
+  %11 = load i32, ptr %arrayidx30, align 4
+  %add31 = add nsw i32 %11, %9
+  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 2
+  store i32 %add31, ptr %arrayidx34, align 8
   %add38 = add nsw i32 %add24, %add31
-  %13 = or i64 %1, 3
-  %arrayidx41 = getelementptr inbounds i32, i32* %
diff , i64 %13
-  %14 = load i32, i32* %arrayidx41, align 4
-  %15 = or i64 %1, 7
-  %arrayidx44 = getelementptr inbounds i32, i32* %
diff , i64 %15
-  %16 = load i32, i32* %arrayidx44, align 4
-  %add45 = add nsw i32 %16, %14
-  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
-  store i32 %add45, i32* %arrayidx48, align 4
+  %12 = or i64 %0, 3
+  %arrayidx41 = getelementptr inbounds i32, ptr %
diff , i64 %12
+  %13 = load i32, ptr %arrayidx41, align 4
+  %14 = or i64 %0, 7
+  %arrayidx44 = getelementptr inbounds i32, ptr %
diff , i64 %14
+  %15 = load i32, ptr %arrayidx44, align 4
+  %add45 = add nsw i32 %15, %13
+  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], ptr %m2, i64 0, i64 %indvars.iv, i64 3
+  store i32 %add45, ptr %arrayidx48, align 4
   %add52 = add nsw i32 %add38, %add45
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
-  %arraydecay = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 0
-  call void @ff([8 x i32]* %arraydecay) #1
+  call void @ff(ptr %m2) #1
   ret i32 %add52
 }
 
-declare void @ff([8 x i32]*) #2
+declare void @ff(ptr) #2
 
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll
index 5fa53a2d14ff8..32b322a8ae0fb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll
@@ -2,39 +2,33 @@
 ; RUN: opt < %s -S -mtriple=x86_64-unknown -passes=slp-vectorizer | FileCheck %s
 
 ; Test for PR49898.
-define void @fusion_1506(i8* %temp_buf1) local_unnamed_addr {
+define void @fusion_1506(ptr %temp_buf1) local_unnamed_addr {
 ; CHECK-LABEL: @fusion_1506(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1:%.*]], i64 5621415936
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1]], i64 7278166016
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1]], i64 5097127936
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP1]] to float*
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef
-; CHECK-NEXT:    store float undef, float* [[TMP5]], align 16
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP0]] to float*
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 undef
-; CHECK-NEXT:    store float undef, float* [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 undef
-; CHECK-NEXT:    store float undef, float* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef
-; CHECK-NEXT:    store float undef, float* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[TEMP_BUF1:%.*]], i64 5621415936
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEMP_BUF1]], i64 7278166016
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TEMP_BUF1]], i64 5097127936
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 undef
+; CHECK-NEXT:    store float undef, ptr [[TMP5]], align 16
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 undef
+; CHECK-NEXT:    store float undef, ptr [[TMP7]], align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 undef
+; CHECK-NEXT:    store float undef, ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef
+; CHECK-NEXT:    store float undef, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = getelementptr inbounds i8, i8* %temp_buf1, i64 5621415936
-  %1 = getelementptr inbounds i8, i8* %temp_buf1, i64 7278166016
-  %2 = getelementptr inbounds i8, i8* %temp_buf1, i64 5097127936
-  %3 = bitcast i8* %2 to float*
-  %4 = bitcast i8* %1 to float*
-  %5 = getelementptr inbounds float, float* %4, i64 undef
-  store float undef, float* %5, align 16
-  %6 = bitcast i8* %0 to float*
-  %7 = getelementptr inbounds float, float* %6, i64 undef
-  store float undef, float* %7, align 16
-  %8 = getelementptr inbounds float, float* %6, i64 undef
-  store float undef, float* %8, align 4
-  %9 = getelementptr inbounds float, float* %3, i64 undef
-  store float undef, float* %9, align 4
+  %0 = getelementptr inbounds i8, ptr %temp_buf1, i64 5621415936
+  %1 = getelementptr inbounds i8, ptr %temp_buf1, i64 7278166016
+  %2 = getelementptr inbounds i8, ptr %temp_buf1, i64 5097127936
+  %3 = getelementptr inbounds float, ptr %1, i64 undef
+  store float undef, ptr %3, align 16
+  %4 = getelementptr inbounds float, ptr %0, i64 undef
+  store float undef, ptr %4, align 16
+  %5 = getelementptr inbounds float, ptr %0, i64 undef
+  store float undef, ptr %5, align 4
+  %6 = getelementptr inbounds float, ptr %2, i64 undef
+  store float undef, ptr %6, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
index 3c978f816a28a..5ae0ad932fddd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
@@ -10,11 +10,11 @@
 ; vXi8
 ;
 
-define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
+define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, i8* [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, i8* [[P1]], align 1
+; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
+; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
 ; SSE2-NEXT:    [[X0:%.*]] = sext i8 [[I0]] to i64
 ; SSE2-NEXT:    [[X1:%.*]] = sext i8 [[I1]] to i64
 ; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
@@ -22,20 +22,18 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
 ; SSE2-NEXT:    ret <2 x i64> [[V1]]
 ;
 ; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i8_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
   %x0 = sext i8 %i0 to i64
   %x1 = sext i8 %i1 to i64
   %v0 = insertelement <2 x i64> poison, i64 %x0, i32 0
@@ -43,26 +41,24 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
+define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
 ; SSE-LABEL: @loadext_4i8_to_4i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
 ; SSE-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = sext i8 %i0 to i32
   %x1 = sext i8 %i1 to i32
   %x2 = sext i8 %i2 to i32
@@ -74,26 +70,24 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
+define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
 ; SSE-LABEL: @loadext_4i8_to_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
 ; SSE-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = sext i8 %i0 to i64
   %x1 = sext i8 %i1 to i64
   %x2 = sext i8 %i2 to i64
@@ -105,34 +99,32 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
+define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
 ; SSE-LABEL: @loadext_8i8_to_8i16(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
 ; SSE-NEXT:    ret <8 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
 ; AVX-NEXT:    ret <8 x i16> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = sext i8 %i0 to i16
   %x1 = sext i8 %i1 to i16
   %x2 = sext i8 %i2 to i16
@@ -152,34 +144,32 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
   ret <8 x i16> %v7
 }
 
-define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
+define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
 ; SSE-LABEL: @loadext_8i8_to_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
 ; SSE-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = sext i8 %i0 to i32
   %x1 = sext i8 %i1 to i32
   %x2 = sext i8 %i2 to i32
@@ -199,50 +189,48 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
   ret <8 x i32> %v7
 }
 
-define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
+define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
 ; SSE-LABEL: @loadext_16i8_to_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
 ; SSE-NEXT:    ret <16 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_16i8_to_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
 ; AVX-NEXT:    ret <16 x i16> [[TMP3]]
 ;
-  %p1  = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2  = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3  = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4  = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5  = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6  = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7  = getelementptr inbounds i8, i8* %p0, i64 7
-  %p8  = getelementptr inbounds i8, i8* %p0, i64 8
-  %p9  = getelementptr inbounds i8, i8* %p0, i64 9
-  %p10 = getelementptr inbounds i8, i8* %p0, i64 10
-  %p11 = getelementptr inbounds i8, i8* %p0, i64 11
-  %p12 = getelementptr inbounds i8, i8* %p0, i64 12
-  %p13 = getelementptr inbounds i8, i8* %p0, i64 13
-  %p14 = getelementptr inbounds i8, i8* %p0, i64 14
-  %p15 = getelementptr inbounds i8, i8* %p0, i64 15
-  %i0  = load i8, i8* %p0,  align 1
-  %i1  = load i8, i8* %p1,  align 1
-  %i2  = load i8, i8* %p2,  align 1
-  %i3  = load i8, i8* %p3,  align 1
-  %i4  = load i8, i8* %p4,  align 1
-  %i5  = load i8, i8* %p5,  align 1
-  %i6  = load i8, i8* %p6,  align 1
-  %i7  = load i8, i8* %p7,  align 1
-  %i8  = load i8, i8* %p8,  align 1
-  %i9  = load i8, i8* %p9,  align 1
-  %i10 = load i8, i8* %p10, align 1
-  %i11 = load i8, i8* %p11, align 1
-  %i12 = load i8, i8* %p12, align 1
-  %i13 = load i8, i8* %p13, align 1
-  %i14 = load i8, i8* %p14, align 1
-  %i15 = load i8, i8* %p15, align 1
+  %p1  = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2  = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3  = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4  = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5  = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6  = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7  = getelementptr inbounds i8, ptr %p0, i64 7
+  %p8  = getelementptr inbounds i8, ptr %p0, i64 8
+  %p9  = getelementptr inbounds i8, ptr %p0, i64 9
+  %p10 = getelementptr inbounds i8, ptr %p0, i64 10
+  %p11 = getelementptr inbounds i8, ptr %p0, i64 11
+  %p12 = getelementptr inbounds i8, ptr %p0, i64 12
+  %p13 = getelementptr inbounds i8, ptr %p0, i64 13
+  %p14 = getelementptr inbounds i8, ptr %p0, i64 14
+  %p15 = getelementptr inbounds i8, ptr %p0, i64 15
+  %i0  = load i8, ptr %p0,  align 1
+  %i1  = load i8, ptr %p1,  align 1
+  %i2  = load i8, ptr %p2,  align 1
+  %i3  = load i8, ptr %p3,  align 1
+  %i4  = load i8, ptr %p4,  align 1
+  %i5  = load i8, ptr %p5,  align 1
+  %i6  = load i8, ptr %p6,  align 1
+  %i7  = load i8, ptr %p7,  align 1
+  %i8  = load i8, ptr %p8,  align 1
+  %i9  = load i8, ptr %p9,  align 1
+  %i10 = load i8, ptr %p10, align 1
+  %i11 = load i8, ptr %p11, align 1
+  %i12 = load i8, ptr %p12, align 1
+  %i13 = load i8, ptr %p13, align 1
+  %i14 = load i8, ptr %p14, align 1
+  %i15 = load i8, ptr %p15, align 1
   %x0  = sext i8 %i0  to i16
   %x1  = sext i8 %i1  to i16
   %x2  = sext i8 %i2  to i16
@@ -282,22 +270,20 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
 ; vXi16
 ;
 
-define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
+define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
 ; SSE-LABEL: @loadext_2i16_to_2i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
 ; SSE-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i16_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
   %x0 = sext i16 %i0 to i64
   %x1 = sext i16 %i1 to i64
   %v0 = insertelement <2 x i64> poison, i64 %x0, i32 0
@@ -305,26 +291,24 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
+define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
 ; SSE-LABEL: @loadext_4i16_to_4i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
 ; SSE-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = sext i16 %i0 to i32
   %x1 = sext i16 %i1 to i32
   %x2 = sext i16 %i2 to i32
@@ -336,26 +320,24 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
+define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
 ; SSE-LABEL: @loadext_4i16_to_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
 ; SSE-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = sext i16 %i0 to i64
   %x1 = sext i16 %i1 to i64
   %x2 = sext i16 %i2 to i64
@@ -367,34 +349,32 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
+define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
 ; SSE-LABEL: @loadext_8i16_to_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
 ; SSE-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i16_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %p4 = getelementptr inbounds i16, i16* %p0, i64 4
-  %p5 = getelementptr inbounds i16, i16* %p0, i64 5
-  %p6 = getelementptr inbounds i16, i16* %p0, i64 6
-  %p7 = getelementptr inbounds i16, i16* %p0, i64 7
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
-  %i4 = load i16, i16* %p4, align 1
-  %i5 = load i16, i16* %p5, align 1
-  %i6 = load i16, i16* %p6, align 1
-  %i7 = load i16, i16* %p7, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
+  %i4 = load i16, ptr %p4, align 1
+  %i5 = load i16, ptr %p5, align 1
+  %i6 = load i16, ptr %p6, align 1
+  %i7 = load i16, ptr %p7, align 1
   %x0 = sext i16 %i0 to i32
   %x1 = sext i16 %i1 to i32
   %x2 = sext i16 %i2 to i32
@@ -418,22 +398,20 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
 ; vXi32
 ;
 
-define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
+define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
 ; SSE-LABEL: @loadext_2i32_to_2i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
 ; SSE-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i32_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
   %x0 = sext i32 %i0 to i64
   %x1 = sext i32 %i1 to i64
   %v0 = insertelement <2 x i64> poison, i64 %x0, i32 0
@@ -441,26 +419,24 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
+define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
 ; SSE-LABEL: @loadext_4i32_to_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
 ; SSE-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i32_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %p2 = getelementptr inbounds i32, i32* %p0, i64 2
-  %p3 = getelementptr inbounds i32, i32* %p0, i64 3
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
-  %i2 = load i32, i32* %p2, align 1
-  %i3 = load i32, i32* %p3, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
+  %i2 = load i32, ptr %p2, align 1
+  %i3 = load i32, ptr %p3, align 1
   %x0 = sext i32 %i0 to i64
   %x1 = sext i32 %i1 to i64
   %x2 = sext i32 %i2 to i64

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
index dcdbb9453734d..7d38aeb0c3635 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
@@ -10,11 +10,11 @@
 ; vXi8
 ;
 
-define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
+define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, i8* [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, i8* [[P1]], align 1
+; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
+; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
 ; SSE2-NEXT:    [[X0:%.*]] = sext i8 [[I0]] to i64
 ; SSE2-NEXT:    [[X1:%.*]] = sext i8 [[I1]] to i64
 ; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
@@ -22,20 +22,18 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
 ; SSE2-NEXT:    ret <2 x i64> [[V1]]
 ;
 ; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i8_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
   %x0 = sext i8 %i0 to i64
   %x1 = sext i8 %i1 to i64
   %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
@@ -43,26 +41,24 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
+define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
 ; SSE-LABEL: @loadext_4i8_to_4i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
 ; SSE-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = sext i8 %i0 to i32
   %x1 = sext i8 %i1 to i32
   %x2 = sext i8 %i2 to i32
@@ -74,26 +70,24 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
+define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
 ; SSE-LABEL: @loadext_4i8_to_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
 ; SSE-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = sext i8 %i0 to i64
   %x1 = sext i8 %i1 to i64
   %x2 = sext i8 %i2 to i64
@@ -105,34 +99,32 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
+define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
 ; SSE-LABEL: @loadext_8i8_to_8i16(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
 ; SSE-NEXT:    ret <8 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
 ; AVX-NEXT:    ret <8 x i16> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = sext i8 %i0 to i16
   %x1 = sext i8 %i1 to i16
   %x2 = sext i8 %i2 to i16
@@ -152,34 +144,32 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
   ret <8 x i16> %v7
 }
 
-define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
+define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
 ; SSE-LABEL: @loadext_8i8_to_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
 ; SSE-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = sext i8 %i0 to i32
   %x1 = sext i8 %i1 to i32
   %x2 = sext i8 %i2 to i32
@@ -199,50 +189,48 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
   ret <8 x i32> %v7
 }
 
-define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
+define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
 ; SSE-LABEL: @loadext_16i8_to_16i16(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
 ; SSE-NEXT:    ret <16 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_16i8_to_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
 ; AVX-NEXT:    ret <16 x i16> [[TMP3]]
 ;
-  %p1  = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2  = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3  = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4  = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5  = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6  = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7  = getelementptr inbounds i8, i8* %p0, i64 7
-  %p8  = getelementptr inbounds i8, i8* %p0, i64 8
-  %p9  = getelementptr inbounds i8, i8* %p0, i64 9
-  %p10 = getelementptr inbounds i8, i8* %p0, i64 10
-  %p11 = getelementptr inbounds i8, i8* %p0, i64 11
-  %p12 = getelementptr inbounds i8, i8* %p0, i64 12
-  %p13 = getelementptr inbounds i8, i8* %p0, i64 13
-  %p14 = getelementptr inbounds i8, i8* %p0, i64 14
-  %p15 = getelementptr inbounds i8, i8* %p0, i64 15
-  %i0  = load i8, i8* %p0,  align 1
-  %i1  = load i8, i8* %p1,  align 1
-  %i2  = load i8, i8* %p2,  align 1
-  %i3  = load i8, i8* %p3,  align 1
-  %i4  = load i8, i8* %p4,  align 1
-  %i5  = load i8, i8* %p5,  align 1
-  %i6  = load i8, i8* %p6,  align 1
-  %i7  = load i8, i8* %p7,  align 1
-  %i8  = load i8, i8* %p8,  align 1
-  %i9  = load i8, i8* %p9,  align 1
-  %i10 = load i8, i8* %p10, align 1
-  %i11 = load i8, i8* %p11, align 1
-  %i12 = load i8, i8* %p12, align 1
-  %i13 = load i8, i8* %p13, align 1
-  %i14 = load i8, i8* %p14, align 1
-  %i15 = load i8, i8* %p15, align 1
+  %p1  = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2  = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3  = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4  = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5  = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6  = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7  = getelementptr inbounds i8, ptr %p0, i64 7
+  %p8  = getelementptr inbounds i8, ptr %p0, i64 8
+  %p9  = getelementptr inbounds i8, ptr %p0, i64 9
+  %p10 = getelementptr inbounds i8, ptr %p0, i64 10
+  %p11 = getelementptr inbounds i8, ptr %p0, i64 11
+  %p12 = getelementptr inbounds i8, ptr %p0, i64 12
+  %p13 = getelementptr inbounds i8, ptr %p0, i64 13
+  %p14 = getelementptr inbounds i8, ptr %p0, i64 14
+  %p15 = getelementptr inbounds i8, ptr %p0, i64 15
+  %i0  = load i8, ptr %p0,  align 1
+  %i1  = load i8, ptr %p1,  align 1
+  %i2  = load i8, ptr %p2,  align 1
+  %i3  = load i8, ptr %p3,  align 1
+  %i4  = load i8, ptr %p4,  align 1
+  %i5  = load i8, ptr %p5,  align 1
+  %i6  = load i8, ptr %p6,  align 1
+  %i7  = load i8, ptr %p7,  align 1
+  %i8  = load i8, ptr %p8,  align 1
+  %i9  = load i8, ptr %p9,  align 1
+  %i10 = load i8, ptr %p10, align 1
+  %i11 = load i8, ptr %p11, align 1
+  %i12 = load i8, ptr %p12, align 1
+  %i13 = load i8, ptr %p13, align 1
+  %i14 = load i8, ptr %p14, align 1
+  %i15 = load i8, ptr %p15, align 1
   %x0  = sext i8 %i0  to i16
   %x1  = sext i8 %i1  to i16
   %x2  = sext i8 %i2  to i16
@@ -282,22 +270,20 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
 ; vXi16
 ;
 
-define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
+define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
 ; SSE-LABEL: @loadext_2i16_to_2i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
 ; SSE-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i16_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
   %x0 = sext i16 %i0 to i64
   %x1 = sext i16 %i1 to i64
   %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
@@ -305,26 +291,24 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
+define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
 ; SSE-LABEL: @loadext_4i16_to_4i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
 ; SSE-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = sext i16 %i0 to i32
   %x1 = sext i16 %i1 to i32
   %x2 = sext i16 %i2 to i32
@@ -336,26 +320,24 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
+define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
 ; SSE-LABEL: @loadext_4i16_to_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
 ; SSE-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = sext i16 %i0 to i64
   %x1 = sext i16 %i1 to i64
   %x2 = sext i16 %i2 to i64
@@ -367,34 +349,32 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
+define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
 ; SSE-LABEL: @loadext_8i16_to_8i32(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
 ; SSE-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i16_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %p4 = getelementptr inbounds i16, i16* %p0, i64 4
-  %p5 = getelementptr inbounds i16, i16* %p0, i64 5
-  %p6 = getelementptr inbounds i16, i16* %p0, i64 6
-  %p7 = getelementptr inbounds i16, i16* %p0, i64 7
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
-  %i4 = load i16, i16* %p4, align 1
-  %i5 = load i16, i16* %p5, align 1
-  %i6 = load i16, i16* %p6, align 1
-  %i7 = load i16, i16* %p7, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
+  %i4 = load i16, ptr %p4, align 1
+  %i5 = load i16, ptr %p5, align 1
+  %i6 = load i16, ptr %p6, align 1
+  %i7 = load i16, ptr %p7, align 1
   %x0 = sext i16 %i0 to i32
   %x1 = sext i16 %i1 to i32
   %x2 = sext i16 %i2 to i32
@@ -418,22 +398,20 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
 ; vXi32
 ;
 
-define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
+define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
 ; SSE-LABEL: @loadext_2i32_to_2i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
 ; SSE-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i32_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
   %x0 = sext i32 %i0 to i64
   %x1 = sext i32 %i1 to i64
   %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
@@ -441,26 +419,24 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
+define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
 ; SSE-LABEL: @loadext_4i32_to_4i64(
-; SSE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
 ; SSE-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i32_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %p2 = getelementptr inbounds i32, i32* %p0, i64 2
-  %p3 = getelementptr inbounds i32, i32* %p0, i64 3
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
-  %i2 = load i32, i32* %p2, align 1
-  %i3 = load i32, i32* %p3, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
+  %i2 = load i32, ptr %p2, align 1
+  %i3 = load i32, ptr %p3, align 1
   %x0 = sext i32 %i0 to i64
   %x1 = sext i32 %i1 to i64
   %x2 = sext i32 %i2 to i64

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
index 19fa75ad5c8e9..51798deae694a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
@@ -22,22 +22,22 @@
 
 define void @ashr_v8i64() {
 ; SSE-LABEL: @ashr_v8i64(
-; SSE-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; SSE-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; SSE-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[R0:%.*]] = ashr i64 [[A0]], [[B0]]
 ; SSE-NEXT:    [[R1:%.*]] = ashr i64 [[A1]], [[B1]]
 ; SSE-NEXT:    [[R2:%.*]] = ashr i64 [[A2]], [[B2]]
@@ -46,33 +46,33 @@ define void @ashr_v8i64() {
 ; SSE-NEXT:    [[R5:%.*]] = ashr i64 [[A5]], [[B5]]
 ; SSE-NEXT:    [[R6:%.*]] = ashr i64 [[A6]], [[B6]]
 ; SSE-NEXT:    [[R7:%.*]] = ashr i64 [[A7]], [[B7]]
-; SSE-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; SSE-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; SSE-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; SSE-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; SSE-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; SSE-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; SSE-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; SSE-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; SSE-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; SSE-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; SSE-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; SSE-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; SSE-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; SSE-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @ashr_v8i64(
-; AVX1-NEXT:    [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-; AVX1-NEXT:    [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-; AVX1-NEXT:    [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-; AVX1-NEXT:    [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-; AVX1-NEXT:    [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-; AVX1-NEXT:    [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-; AVX1-NEXT:    [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-; AVX1-NEXT:    [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-; AVX1-NEXT:    [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-; AVX1-NEXT:    [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-; AVX1-NEXT:    [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-; AVX1-NEXT:    [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-; AVX1-NEXT:    [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-; AVX1-NEXT:    [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-; AVX1-NEXT:    [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+; AVX1-NEXT:    [[A0:%.*]] = load i64, ptr @a64, align 8
+; AVX1-NEXT:    [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+; AVX1-NEXT:    [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+; AVX1-NEXT:    [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; AVX1-NEXT:    [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+; AVX1-NEXT:    [[B0:%.*]] = load i64, ptr @b64, align 8
+; AVX1-NEXT:    [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+; AVX1-NEXT:    [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+; AVX1-NEXT:    [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+; AVX1-NEXT:    [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+; AVX1-NEXT:    [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+; AVX1-NEXT:    [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
 ; AVX1-NEXT:    [[R0:%.*]] = ashr i64 [[A0]], [[B0]]
 ; AVX1-NEXT:    [[R1:%.*]] = ashr i64 [[A1]], [[B1]]
 ; AVX1-NEXT:    [[R2:%.*]] = ashr i64 [[A2]], [[B2]]
@@ -81,61 +81,61 @@ define void @ashr_v8i64() {
 ; AVX1-NEXT:    [[R5:%.*]] = ashr i64 [[A5]], [[B5]]
 ; AVX1-NEXT:    [[R6:%.*]] = ashr i64 [[A6]], [[B6]]
 ; AVX1-NEXT:    [[R7:%.*]] = ashr i64 [[A7]], [[B7]]
-; AVX1-NEXT:    store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-; AVX1-NEXT:    store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-; AVX1-NEXT:    store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-; AVX1-NEXT:    store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-; AVX1-NEXT:    store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-; AVX1-NEXT:    store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-; AVX1-NEXT:    store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-; AVX1-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+; AVX1-NEXT:    store i64 [[R0]], ptr @c64, align 8
+; AVX1-NEXT:    store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; AVX1-NEXT:    store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+; AVX1-NEXT:    store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; AVX1-NEXT:    store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+; AVX1-NEXT:    store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+; AVX1-NEXT:    store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @ashr_v8i64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX2-NEXT:    [[TMP3:%.*]] = ashr <4 x i64> [[TMP1]], [[TMP2]]
-; AVX2-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX2-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX2-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    [[TMP6:%.*]] = ashr <4 x i64> [[TMP4]], [[TMP5]]
-; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX2-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ashr_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = ashr <8 x i64> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @ashr_v8i64(
-; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; XOP-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; XOP-NEXT:    [[TMP3:%.*]] = ashr <4 x i64> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; XOP-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; XOP-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; XOP-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; XOP-NEXT:    [[TMP6:%.*]] = ashr <4 x i64> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; XOP-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; XOP-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = ashr i64 %a0, %b0
   %r1 = ashr i64 %a1, %b1
   %r2 = ashr i64 %a2, %b2
@@ -144,98 +144,98 @@ define void @ashr_v8i64() {
   %r5 = ashr i64 %a5, %b5
   %r6 = ashr i64 %a6, %b6
   %r7 = ashr i64 %a7, %b7
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @ashr_v16i32() {
 ; SSE-LABEL: @ashr_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = ashr <4 x i32> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = ashr <4 x i32> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = ashr <4 x i32> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ashr_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = ashr <8 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = ashr <8 x i32> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ashr_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = ashr <16 x i32> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @ashr_v16i32(
-; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; XOP-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; XOP-NEXT:    [[TMP3:%.*]] = ashr <8 x i32> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; XOP-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; XOP-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; XOP-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; XOP-NEXT:    [[TMP6:%.*]] = ashr <8 x i32> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; XOP-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = ashr i32 %a0 , %b0
   %r1  = ashr i32 %a1 , %b1
   %r2  = ashr i32 %a2 , %b2
@@ -252,138 +252,138 @@ define void @ashr_v16i32() {
   %r13 = ashr i32 %a13, %b13
   %r14 = ashr i32 %a14, %b14
   %r15 = ashr i32 %a15, %b15
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @ashr_v32i16() {
 ; SSE-LABEL: @ashr_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = ashr <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = ashr <8 x i16> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = ashr <8 x i16> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ashr_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = ashr <16 x i16> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = ashr <16 x i16> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ashr_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = ashr <32 x i16> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @ashr_v32i16(
-; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; XOP-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; XOP-NEXT:    [[TMP3:%.*]] = ashr <16 x i16> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; XOP-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; XOP-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; XOP-NEXT:    [[TMP6:%.*]] = ashr <16 x i16> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = ashr i16 %a0 , %b0
   %r1  = ashr i16 %a1 , %b1
   %r2  = ashr i16 %a2 , %b2
@@ -416,218 +416,218 @@ define void @ashr_v32i16() {
   %r29 = ashr i16 %a29, %b29
   %r30 = ashr i16 %a30, %b30
   %r31 = ashr i16 %a31, %b31
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @ashr_v64i8() {
 ; SSE-LABEL: @ashr_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = ashr <16 x i8> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = ashr <16 x i8> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = ashr <16 x i8> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @ashr_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = ashr <32 x i8> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = ashr <32 x i8> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @ashr_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = ashr <64 x i8> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @ashr_v64i8(
-; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; XOP-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; XOP-NEXT:    [[TMP3:%.*]] = ashr <32 x i8> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; XOP-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; XOP-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; XOP-NEXT:    [[TMP6:%.*]] = ashr <32 x i8> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = ashr i8 %a0 , %b0
   %r1  = ashr i8 %a1 , %b1
   %r2  = ashr i8 %a2 , %b2
@@ -692,69 +692,69 @@ define void @ashr_v64i8() {
   %r61 = ashr i8 %a61, %b61
   %r62 = ashr i8 %a62, %b62
   %r63 = ashr i8 %a63, %b63
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
index 316993bd733ee..7583561bbecf9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
@@ -22,69 +22,69 @@
 
 define void @lshr_v8i64() {
 ; SSE-LABEL: @lshr_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = lshr <2 x i64> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = lshr <2 x i64> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = lshr <2 x i64> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @lshr_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = lshr <4 x i64> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @lshr_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = lshr <8 x i64> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @lshr_v8i64(
-; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; XOP-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; XOP-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; XOP-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; XOP-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; XOP-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; XOP-NEXT:    [[TMP6:%.*]] = lshr <4 x i64> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; XOP-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; XOP-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = lshr i64 %a0, %b0
   %r1 = lshr i64 %a1, %b1
   %r2 = lshr i64 %a2, %b2
@@ -93,98 +93,98 @@ define void @lshr_v8i64() {
   %r5 = lshr i64 %a5, %b5
   %r6 = lshr i64 %a6, %b6
   %r7 = lshr i64 %a7, %b7
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @lshr_v16i32() {
 ; SSE-LABEL: @lshr_v16i32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
+; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr @c32, align 4
+; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
-; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP12:%.*]] = lshr <4 x i32> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
+; SSE-NEXT:    store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @lshr_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = lshr <8 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = lshr <8 x i32> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @lshr_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = lshr <16 x i32> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @lshr_v16i32(
-; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; XOP-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; XOP-NEXT:    [[TMP3:%.*]] = lshr <8 x i32> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; XOP-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; XOP-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; XOP-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; XOP-NEXT:    [[TMP6:%.*]] = lshr <8 x i32> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; XOP-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = lshr i32 %a0 , %b0
   %r1  = lshr i32 %a1 , %b1
   %r2  = lshr i32 %a2 , %b2
@@ -201,138 +201,138 @@ define void @lshr_v16i32() {
   %r13 = lshr i32 %a13, %b13
   %r14 = lshr i32 %a14, %b14
   %r15 = lshr i32 %a15, %b15
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @lshr_v32i16() {
 ; SSE-LABEL: @lshr_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = lshr <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = lshr <8 x i16> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = lshr <8 x i16> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @lshr_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = lshr <16 x i16> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @lshr_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = lshr <32 x i16> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @lshr_v32i16(
-; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; XOP-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; XOP-NEXT:    [[TMP3:%.*]] = lshr <16 x i16> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; XOP-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; XOP-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; XOP-NEXT:    [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = lshr i16 %a0 , %b0
   %r1  = lshr i16 %a1 , %b1
   %r2  = lshr i16 %a2 , %b2
@@ -365,218 +365,218 @@ define void @lshr_v32i16() {
   %r29 = lshr i16 %a29, %b29
   %r30 = lshr i16 %a30, %b30
   %r31 = lshr i16 %a31, %b31
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @lshr_v64i8() {
 ; SSE-LABEL: @lshr_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = lshr <16 x i8> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = lshr <16 x i8> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @lshr_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = lshr <32 x i8> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = lshr <32 x i8> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @lshr_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = lshr <64 x i8> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @lshr_v64i8(
-; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; XOP-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; XOP-NEXT:    [[TMP3:%.*]] = lshr <32 x i8> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; XOP-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; XOP-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; XOP-NEXT:    [[TMP6:%.*]] = lshr <32 x i8> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = lshr i8 %a0 , %b0
   %r1  = lshr i8 %a1 , %b1
   %r2  = lshr i8 %a2 , %b2
@@ -641,69 +641,69 @@ define void @lshr_v64i8() {
   %r61 = lshr i8 %a61, %b61
   %r62 = lshr i8 %a62, %b62
   %r63 = lshr i8 %a63, %b63
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
index f3a897f47b353..5ec327c131fb7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
@@ -22,69 +22,69 @@
 
 define void @shl_v8i64() {
 ; SSE-LABEL: @shl_v8i64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @a64, align 8
+; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @b64, align 8
 ; SSE-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr @c64, align 8
+; SSE-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = shl <2 x i64> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP8:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = shl <2 x i64> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
-; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP9]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP12:%.*]] = shl <2 x i64> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
+; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @shl_v8i64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; AVX-NEXT:    [[TMP3:%.*]] = shl <4 x i64> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = shl <4 x i64> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @shl_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8
 ; AVX512-NEXT:    [[TMP3:%.*]] = shl <8 x i64> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], ptr @c64, align 8
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @shl_v8i64(
-; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; XOP-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8
+; XOP-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8
 ; XOP-NEXT:    [[TMP3:%.*]] = shl <4 x i64> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <4 x i64> [[TMP3]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
-; XOP-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; XOP-NEXT:    store <4 x i64> [[TMP3]], ptr @c64, align 8
+; XOP-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+; XOP-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
 ; XOP-NEXT:    [[TMP6:%.*]] = shl <4 x i64> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; XOP-NEXT:    store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
 ; XOP-NEXT:    ret void
 ;
-  %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
-  %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
-  %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
-  %a3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
-  %a4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
-  %a5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
-  %a6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
-  %a7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
-  %b0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
-  %b1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
-  %b2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
-  %b3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
-  %b4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
-  %b5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
-  %b6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
-  %b7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
+  %a0 = load i64, ptr @a64, align 8
+  %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8
+  %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8
+  %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8
+  %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8
+  %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8
+  %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8
+  %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8
+  %b0 = load i64, ptr @b64, align 8
+  %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8
+  %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8
+  %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8
+  %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8
+  %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8
+  %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8
+  %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8
   %r0 = shl i64 %a0, %b0
   %r1 = shl i64 %a1, %b1
   %r2 = shl i64 %a2, %b2
@@ -93,51 +93,51 @@ define void @shl_v8i64() {
   %r5 = shl i64 %a5, %b5
   %r6 = shl i64 %a6, %b6
   %r7 = shl i64 %a7, %b7
-  store i64 %r0, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
-  store i64 %r1, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
-  store i64 %r2, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
-  store i64 %r3, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
-  store i64 %r4, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
-  store i64 %r5, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
-  store i64 %r6, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
-  store i64 %r7, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
+  store i64 %r0, ptr @c64, align 8
+  store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8
+  store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8
+  store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8
+  store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8
+  store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8
+  store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8
+  store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @shl_v16i32() {
 ; SSE-LABEL: @shl_v16i32(
-; SSE-NEXT:    [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
-; SSE-NEXT:    [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
-; SSE-NEXT:    [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
-; SSE-NEXT:    [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
-; SSE-NEXT:    [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
-; SSE-NEXT:    [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
-; SSE-NEXT:    [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-; SSE-NEXT:    [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-; SSE-NEXT:    [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-; SSE-NEXT:    [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-; SSE-NEXT:    [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-; SSE-NEXT:    [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-; SSE-NEXT:    [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
-; SSE-NEXT:    [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
-; SSE-NEXT:    [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
-; SSE-NEXT:    [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
-; SSE-NEXT:    [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
-; SSE-NEXT:    [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
-; SSE-NEXT:    [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-; SSE-NEXT:    [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-; SSE-NEXT:    [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-; SSE-NEXT:    [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-; SSE-NEXT:    [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-; SSE-NEXT:    [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+; SSE-NEXT:    [[A0:%.*]] = load i32, ptr @a32, align 4
+; SSE-NEXT:    [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4
+; SSE-NEXT:    [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4
+; SSE-NEXT:    [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4
+; SSE-NEXT:    [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4
+; SSE-NEXT:    [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+; SSE-NEXT:    [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+; SSE-NEXT:    [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+; SSE-NEXT:    [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+; SSE-NEXT:    [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+; SSE-NEXT:    [[B0:%.*]] = load i32, ptr @b32, align 4
+; SSE-NEXT:    [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4
+; SSE-NEXT:    [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4
+; SSE-NEXT:    [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4
+; SSE-NEXT:    [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4
+; SSE-NEXT:    [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4
+; SSE-NEXT:    [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4
+; SSE-NEXT:    [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4
+; SSE-NEXT:    [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+; SSE-NEXT:    [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+; SSE-NEXT:    [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+; SSE-NEXT:    [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+; SSE-NEXT:    [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+; SSE-NEXT:    [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
 ; SSE-NEXT:    [[R0:%.*]] = shl i32 [[A0]], [[B0]]
 ; SSE-NEXT:    [[R1:%.*]] = shl i32 [[A1]], [[B1]]
 ; SSE-NEXT:    [[R2:%.*]] = shl i32 [[A2]], [[B2]]
@@ -154,85 +154,85 @@ define void @shl_v16i32() {
 ; SSE-NEXT:    [[R13:%.*]] = shl i32 [[A13]], [[B13]]
 ; SSE-NEXT:    [[R14:%.*]] = shl i32 [[A14]], [[B14]]
 ; SSE-NEXT:    [[R15:%.*]] = shl i32 [[A15]], [[B15]]
-; SSE-NEXT:    store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
-; SSE-NEXT:    store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
-; SSE-NEXT:    store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
-; SSE-NEXT:    store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
-; SSE-NEXT:    store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
-; SSE-NEXT:    store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
-; SSE-NEXT:    store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
-; SSE-NEXT:    store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
-; SSE-NEXT:    store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
-; SSE-NEXT:    store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
-; SSE-NEXT:    store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-; SSE-NEXT:    store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-; SSE-NEXT:    store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-; SSE-NEXT:    store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-; SSE-NEXT:    store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-; SSE-NEXT:    store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+; SSE-NEXT:    store i32 [[R0]], ptr @c32, align 4
+; SSE-NEXT:    store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1), align 4
+; SSE-NEXT:    store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2), align 4
+; SSE-NEXT:    store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3), align 4
+; SSE-NEXT:    store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
+; SSE-NEXT:    store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5), align 4
+; SSE-NEXT:    store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6), align 4
+; SSE-NEXT:    store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7), align 4
+; SSE-NEXT:    store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
+; SSE-NEXT:    store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9), align 4
+; SSE-NEXT:    store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+; SSE-NEXT:    store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+; SSE-NEXT:    store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+; SSE-NEXT:    store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+; SSE-NEXT:    store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+; SSE-NEXT:    store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @shl_v16i32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = shl <8 x i32> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; AVX-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; AVX-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; AVX-NEXT:    [[TMP6:%.*]] = shl <8 x i32> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; AVX-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @shl_v16i32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @a32 to <16 x i32>*), align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @b32 to <16 x i32>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
+; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4
 ; AVX512-NEXT:    [[TMP3:%.*]] = shl <16 x i32> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast ([16 x i32]* @c32 to <16 x i32>*), align 4
+; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @shl_v16i32(
-; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @a32 to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @b32 to <8 x i32>*), align 4
+; XOP-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
+; XOP-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4
 ; XOP-NEXT:    [[TMP3:%.*]] = shl <8 x i32> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <8 x i32> [[TMP3]], <8 x i32>* bitcast ([16 x i32]* @c32 to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP4:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <8 x i32>*), align 4
-; XOP-NEXT:    [[TMP5:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <8 x i32>*), align 4
+; XOP-NEXT:    store <8 x i32> [[TMP3]], ptr @c32, align 4
+; XOP-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
+; XOP-NEXT:    [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4
 ; XOP-NEXT:    [[TMP6:%.*]] = shl <8 x i32> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <8 x i32> [[TMP6]], <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <8 x i32>*), align 4
+; XOP-NEXT:    store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0 ), align 4
-  %a1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1 ), align 4
-  %a2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2 ), align 4
-  %a3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3 ), align 4
-  %a4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4 ), align 4
-  %a5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5 ), align 4
-  %a6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6 ), align 4
-  %a7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7 ), align 4
-  %a8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8 ), align 4
-  %a9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9 ), align 4
-  %a10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
-  %a11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
-  %a12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
-  %a13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
-  %a14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
-  %a15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
-  %b0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0 ), align 4
-  %b1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1 ), align 4
-  %b2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2 ), align 4
-  %b3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3 ), align 4
-  %b4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4 ), align 4
-  %b5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5 ), align 4
-  %b6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6 ), align 4
-  %b7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7 ), align 4
-  %b8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8 ), align 4
-  %b9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9 ), align 4
-  %b10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
-  %b11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
-  %b12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
-  %b13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
-  %b14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
-  %b15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
+  %a0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4
+  %a1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4
+  %a2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4
+  %a3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4
+  %a4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4
+  %a5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4
+  %a6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4
+  %a7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4
+  %a8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4
+  %a9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4
+  %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4
+  %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4
+  %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
+  %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4
+  %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4
+  %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4
+  %b0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4
+  %b1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4
+  %b2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4
+  %b3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4
+  %b4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4
+  %b5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4
+  %b6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4
+  %b7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4
+  %b8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4
+  %b9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4
+  %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4
+  %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4
+  %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4
+  %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4
+  %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4
+  %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4
   %r0  = shl i32 %a0 , %b0
   %r1  = shl i32 %a1 , %b1
   %r2  = shl i32 %a2 , %b2
@@ -249,138 +249,138 @@ define void @shl_v16i32() {
   %r13 = shl i32 %a13, %b13
   %r14 = shl i32 %a14, %b14
   %r15 = shl i32 %a15, %b15
-  store i32 %r0 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0 ), align 4
-  store i32 %r1 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1 ), align 4
-  store i32 %r2 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2 ), align 4
-  store i32 %r3 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3 ), align 4
-  store i32 %r4 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4 ), align 4
-  store i32 %r5 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5 ), align 4
-  store i32 %r6 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6 ), align 4
-  store i32 %r7 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7 ), align 4
-  store i32 %r8 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8 ), align 4
-  store i32 %r9 , i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9 ), align 4
-  store i32 %r10, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
-  store i32 %r11, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
-  store i32 %r12, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
-  store i32 %r13, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
-  store i32 %r14, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
-  store i32 %r15, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
+  store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4
+  store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4
+  store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4
+  store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4
+  store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4
+  store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4
+  store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4
+  store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4
+  store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4
+  store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4
+  store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4
+  store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4
+  store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
+  store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4
+  store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4
+  store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @shl_v32i16() {
 ; SSE-LABEL: @shl_v32i16(
-; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @a16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @b16 to <8 x i16>*), align 2
+; SSE-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2
+; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2
 ; SSE-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([32 x i16]* @c16 to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP3]], ptr @c16, align 2
+; SSE-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2
 ; SSE-NEXT:    [[TMP6:%.*]] = shl <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2
+; SSE-NEXT:    [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; SSE-NEXT:    [[TMP9:%.*]] = shl <8 x i16> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    store <8 x i16> [[TMP9]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24) to <8 x i16>*), align 2
-; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+; SSE-NEXT:    [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = shl <8 x i16> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <8 x i16> [[TMP12]], <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24) to <8 x i16>*), align 2
+; SSE-NEXT:    store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @shl_v32i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; AVX-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = shl <16 x i16> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; AVX-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; AVX-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; AVX-NEXT:    [[TMP6:%.*]] = shl <16 x i16> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; AVX-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @shl_v32i16(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @a16 to <32 x i16>*), align 2
-; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, <32 x i16>* bitcast ([32 x i16]* @b16 to <32 x i16>*), align 2
+; AVX512-NEXT:    [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2
+; AVX512-NEXT:    [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2
 ; AVX512-NEXT:    [[TMP3:%.*]] = shl <32 x i16> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <32 x i16> [[TMP3]], <32 x i16>* bitcast ([32 x i16]* @c16 to <32 x i16>*), align 2
+; AVX512-NEXT:    store <32 x i16> [[TMP3]], ptr @c16, align 2
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @shl_v32i16(
-; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @a16 to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP2:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @b16 to <16 x i16>*), align 2
+; XOP-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2
+; XOP-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2
 ; XOP-NEXT:    [[TMP3:%.*]] = shl <16 x i16> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <16 x i16> [[TMP3]], <16 x i16>* bitcast ([32 x i16]* @c16 to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP4:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16) to <16 x i16>*), align 2
-; XOP-NEXT:    [[TMP5:%.*]] = load <16 x i16>, <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16) to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP3]], ptr @c16, align 2
+; XOP-NEXT:    [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+; XOP-NEXT:    [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
 ; XOP-NEXT:    [[TMP6:%.*]] = shl <16 x i16> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <16 x i16> [[TMP6]], <16 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16) to <16 x i16>*), align 2
+; XOP-NEXT:    store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 0 ), align 2
-  %a1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 1 ), align 2
-  %a2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 2 ), align 2
-  %a3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 3 ), align 2
-  %a4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 4 ), align 2
-  %a5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 5 ), align 2
-  %a6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 6 ), align 2
-  %a7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 7 ), align 2
-  %a8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 8 ), align 2
-  %a9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 9 ), align 2
-  %a10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 10), align 2
-  %a11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 11), align 2
-  %a12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 12), align 2
-  %a13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 13), align 2
-  %a14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 14), align 2
-  %a15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 15), align 2
-  %a16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 16), align 2
-  %a17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 17), align 2
-  %a18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 18), align 2
-  %a19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 19), align 2
-  %a20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 20), align 2
-  %a21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 21), align 2
-  %a22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 22), align 2
-  %a23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 23), align 2
-  %a24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 24), align 2
-  %a25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 25), align 2
-  %a26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 26), align 2
-  %a27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 27), align 2
-  %a28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 28), align 2
-  %a29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 29), align 2
-  %a30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 30), align 2
-  %a31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a16, i32 0, i64 31), align 2
-  %b0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 0 ), align 2
-  %b1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 1 ), align 2
-  %b2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 2 ), align 2
-  %b3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 3 ), align 2
-  %b4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 4 ), align 2
-  %b5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 5 ), align 2
-  %b6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 6 ), align 2
-  %b7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 7 ), align 2
-  %b8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 8 ), align 2
-  %b9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 9 ), align 2
-  %b10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 10), align 2
-  %b11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 11), align 2
-  %b12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 12), align 2
-  %b13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 13), align 2
-  %b14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 14), align 2
-  %b15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 15), align 2
-  %b16 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 16), align 2
-  %b17 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 17), align 2
-  %b18 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 18), align 2
-  %b19 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 19), align 2
-  %b20 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 20), align 2
-  %b21 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 21), align 2
-  %b22 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 22), align 2
-  %b23 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 23), align 2
-  %b24 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 24), align 2
-  %b25 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 25), align 2
-  %b26 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 26), align 2
-  %b27 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 27), align 2
-  %b28 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 28), align 2
-  %b29 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 29), align 2
-  %b30 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 30), align 2
-  %b31 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @b16, i32 0, i64 31), align 2
+  %a0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2
+  %a1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2
+  %a2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2
+  %a3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2
+  %a4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2
+  %a5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2
+  %a6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2
+  %a7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2
+  %a8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2
+  %a9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2
+  %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2
+  %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2
+  %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2
+  %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2
+  %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2
+  %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2
+  %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2
+  %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2
+  %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2
+  %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2
+  %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2
+  %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2
+  %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2
+  %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2
+  %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2
+  %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2
+  %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2
+  %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2
+  %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2
+  %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2
+  %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2
+  %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2
+  %b0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2
+  %b1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2
+  %b2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2
+  %b3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2
+  %b4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2
+  %b5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2
+  %b6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2
+  %b7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2
+  %b8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2
+  %b9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2
+  %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2
+  %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2
+  %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2
+  %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2
+  %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2
+  %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2
+  %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2
+  %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2
+  %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2
+  %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2
+  %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2
+  %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2
+  %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2
+  %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2
+  %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2
+  %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2
+  %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2
+  %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2
+  %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2
+  %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2
+  %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2
+  %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2
   %r0  = shl i16 %a0 , %b0
   %r1  = shl i16 %a1 , %b1
   %r2  = shl i16 %a2 , %b2
@@ -413,218 +413,218 @@ define void @shl_v32i16() {
   %r29 = shl i16 %a29, %b29
   %r30 = shl i16 %a30, %b30
   %r31 = shl i16 %a31, %b31
-  store i16 %r0 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 0 ), align 2
-  store i16 %r1 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 1 ), align 2
-  store i16 %r2 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 2 ), align 2
-  store i16 %r3 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 3 ), align 2
-  store i16 %r4 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 4 ), align 2
-  store i16 %r5 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 5 ), align 2
-  store i16 %r6 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 6 ), align 2
-  store i16 %r7 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 7 ), align 2
-  store i16 %r8 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 8 ), align 2
-  store i16 %r9 , i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 9 ), align 2
-  store i16 %r10, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 10), align 2
-  store i16 %r11, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 11), align 2
-  store i16 %r12, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 12), align 2
-  store i16 %r13, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 13), align 2
-  store i16 %r14, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 14), align 2
-  store i16 %r15, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 15), align 2
-  store i16 %r16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 16), align 2
-  store i16 %r17, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 17), align 2
-  store i16 %r18, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 18), align 2
-  store i16 %r19, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 19), align 2
-  store i16 %r20, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 20), align 2
-  store i16 %r21, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 21), align 2
-  store i16 %r22, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 22), align 2
-  store i16 %r23, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 23), align 2
-  store i16 %r24, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 24), align 2
-  store i16 %r25, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 25), align 2
-  store i16 %r26, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 26), align 2
-  store i16 %r27, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 27), align 2
-  store i16 %r28, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 28), align 2
-  store i16 %r29, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 29), align 2
-  store i16 %r30, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 30), align 2
-  store i16 %r31, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @c16, i32 0, i64 31), align 2
+  store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2
+  store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2
+  store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2
+  store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2
+  store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2
+  store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2
+  store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2
+  store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2
+  store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2
+  store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2
+  store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2
+  store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2
+  store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2
+  store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2
+  store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2
+  store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2
+  store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2
+  store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2
+  store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2
+  store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2
+  store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2
+  store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2
+  store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2
+  store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2
+  store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2
+  store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2
+  store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2
+  store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2
+  store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2
+  store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2
+  store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2
+  store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2
   ret void
 }
 
 define void @shl_v64i8() {
 ; SSE-LABEL: @shl_v64i8(
-; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @a8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @b8 to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1
+; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1
 ; SSE-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], [[TMP2]]
-; SSE-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([64 x i8]* @c8 to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr @c8, align 1
+; SSE-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = shl <16 x i8> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+; SSE-NEXT:    [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; SSE-NEXT:    [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; SSE-NEXT:    [[TMP9:%.*]] = shl <16 x i8> [[TMP7]], [[TMP8]]
-; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48) to <16 x i8>*), align 1
-; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+; SSE-NEXT:    [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
 ; SSE-NEXT:    [[TMP12:%.*]] = shl <16 x i8> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <16 x i8> [[TMP9]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <16 x i8>*), align 1
-; SSE-NEXT:    store <16 x i8> [[TMP12]], <16 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48) to <16 x i8>*), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+; SSE-NEXT:    store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @shl_v64i8(
-; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; AVX-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = shl <32 x i8> [[TMP1]], [[TMP2]]
-; AVX-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; AVX-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; AVX-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; AVX-NEXT:    [[TMP6:%.*]] = shl <32 x i8> [[TMP4]], [[TMP5]]
-; AVX-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; AVX-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @shl_v64i8(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @a8 to <64 x i8>*), align 1
-; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, <64 x i8>* bitcast ([64 x i8]* @b8 to <64 x i8>*), align 1
+; AVX512-NEXT:    [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1
+; AVX512-NEXT:    [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1
 ; AVX512-NEXT:    [[TMP3:%.*]] = shl <64 x i8> [[TMP1]], [[TMP2]]
-; AVX512-NEXT:    store <64 x i8> [[TMP3]], <64 x i8>* bitcast ([64 x i8]* @c8 to <64 x i8>*), align 1
+; AVX512-NEXT:    store <64 x i8> [[TMP3]], ptr @c8, align 1
 ; AVX512-NEXT:    ret void
 ;
 ; XOP-LABEL: @shl_v64i8(
-; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @a8 to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP2:%.*]] = load <32 x i8>, <32 x i8>* bitcast ([64 x i8]* @b8 to <32 x i8>*), align 1
+; XOP-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1
+; XOP-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1
 ; XOP-NEXT:    [[TMP3:%.*]] = shl <32 x i8> [[TMP1]], [[TMP2]]
-; XOP-NEXT:    store <32 x i8> [[TMP3]], <32 x i8>* bitcast ([64 x i8]* @c8 to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP4:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32) to <32 x i8>*), align 1
-; XOP-NEXT:    [[TMP5:%.*]] = load <32 x i8>, <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32) to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP3]], ptr @c8, align 1
+; XOP-NEXT:    [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+; XOP-NEXT:    [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
 ; XOP-NEXT:    [[TMP6:%.*]] = shl <32 x i8> [[TMP4]], [[TMP5]]
-; XOP-NEXT:    store <32 x i8> [[TMP6]], <32 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32) to <32 x i8>*), align 1
+; XOP-NEXT:    store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
 ; XOP-NEXT:    ret void
 ;
-  %a0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 0 ), align 1
-  %a1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 1 ), align 1
-  %a2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 2 ), align 1
-  %a3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 3 ), align 1
-  %a4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 4 ), align 1
-  %a5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 5 ), align 1
-  %a6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 6 ), align 1
-  %a7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 7 ), align 1
-  %a8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 8 ), align 1
-  %a9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 9 ), align 1
-  %a10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 10), align 1
-  %a11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 11), align 1
-  %a12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 12), align 1
-  %a13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 13), align 1
-  %a14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 14), align 1
-  %a15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 15), align 1
-  %a16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 16), align 1
-  %a17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 17), align 1
-  %a18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 18), align 1
-  %a19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 19), align 1
-  %a20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 20), align 1
-  %a21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 21), align 1
-  %a22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 22), align 1
-  %a23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 23), align 1
-  %a24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 24), align 1
-  %a25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 25), align 1
-  %a26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 26), align 1
-  %a27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 27), align 1
-  %a28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 28), align 1
-  %a29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 29), align 1
-  %a30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 30), align 1
-  %a31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 31), align 1
-  %a32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 32), align 1
-  %a33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 33), align 1
-  %a34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 34), align 1
-  %a35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 35), align 1
-  %a36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 36), align 1
-  %a37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 37), align 1
-  %a38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 38), align 1
-  %a39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 39), align 1
-  %a40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 40), align 1
-  %a41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 41), align 1
-  %a42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 42), align 1
-  %a43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 43), align 1
-  %a44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 44), align 1
-  %a45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 45), align 1
-  %a46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 46), align 1
-  %a47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 47), align 1
-  %a48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 48), align 1
-  %a49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 49), align 1
-  %a50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 50), align 1
-  %a51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 51), align 1
-  %a52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 52), align 1
-  %a53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 53), align 1
-  %a54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 54), align 1
-  %a55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 55), align 1
-  %a56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 56), align 1
-  %a57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 57), align 1
-  %a58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 58), align 1
-  %a59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 59), align 1
-  %a60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 60), align 1
-  %a61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 61), align 1
-  %a62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 62), align 1
-  %a63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @a8, i32 0, i64 63), align 1
-  %b0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 0 ), align 1
-  %b1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 1 ), align 1
-  %b2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 2 ), align 1
-  %b3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 3 ), align 1
-  %b4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 4 ), align 1
-  %b5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 5 ), align 1
-  %b6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 6 ), align 1
-  %b7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 7 ), align 1
-  %b8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 8 ), align 1
-  %b9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 9 ), align 1
-  %b10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 10), align 1
-  %b11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 11), align 1
-  %b12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 12), align 1
-  %b13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 13), align 1
-  %b14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 14), align 1
-  %b15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 15), align 1
-  %b16 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 16), align 1
-  %b17 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 17), align 1
-  %b18 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 18), align 1
-  %b19 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 19), align 1
-  %b20 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 20), align 1
-  %b21 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 21), align 1
-  %b22 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 22), align 1
-  %b23 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 23), align 1
-  %b24 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 24), align 1
-  %b25 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 25), align 1
-  %b26 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 26), align 1
-  %b27 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 27), align 1
-  %b28 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 28), align 1
-  %b29 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 29), align 1
-  %b30 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 30), align 1
-  %b31 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 31), align 1
-  %b32 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 32), align 1
-  %b33 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 33), align 1
-  %b34 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 34), align 1
-  %b35 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 35), align 1
-  %b36 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 36), align 1
-  %b37 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 37), align 1
-  %b38 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 38), align 1
-  %b39 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 39), align 1
-  %b40 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 40), align 1
-  %b41 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 41), align 1
-  %b42 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 42), align 1
-  %b43 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 43), align 1
-  %b44 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 44), align 1
-  %b45 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 45), align 1
-  %b46 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 46), align 1
-  %b47 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 47), align 1
-  %b48 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 48), align 1
-  %b49 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 49), align 1
-  %b50 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 50), align 1
-  %b51 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 51), align 1
-  %b52 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 52), align 1
-  %b53 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 53), align 1
-  %b54 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 54), align 1
-  %b55 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 55), align 1
-  %b56 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 56), align 1
-  %b57 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 57), align 1
-  %b58 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 58), align 1
-  %b59 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 59), align 1
-  %b60 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 60), align 1
-  %b61 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 61), align 1
-  %b62 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 62), align 1
-  %b63 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @b8, i32 0, i64 63), align 1
+  %a0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1
+  %a1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1
+  %a2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1
+  %a3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1
+  %a4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1
+  %a5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1
+  %a6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1
+  %a7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1
+  %a8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1
+  %a9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1
+  %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1
+  %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1
+  %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1
+  %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1
+  %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1
+  %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1
+  %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1
+  %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1
+  %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1
+  %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1
+  %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1
+  %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1
+  %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1
+  %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1
+  %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1
+  %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1
+  %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1
+  %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1
+  %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1
+  %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1
+  %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1
+  %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1
+  %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1
+  %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1
+  %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1
+  %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1
+  %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1
+  %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1
+  %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1
+  %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1
+  %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1
+  %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1
+  %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1
+  %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1
+  %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1
+  %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1
+  %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1
+  %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1
+  %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1
+  %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1
+  %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1
+  %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1
+  %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1
+  %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1
+  %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1
+  %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1
+  %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1
+  %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1
+  %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1
+  %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1
+  %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1
+  %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1
+  %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1
+  %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1
+  %b0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1
+  %b1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1
+  %b2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1
+  %b3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1
+  %b4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1
+  %b5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1
+  %b6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1
+  %b7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1
+  %b8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1
+  %b9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1
+  %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1
+  %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1
+  %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1
+  %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1
+  %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1
+  %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1
+  %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1
+  %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1
+  %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1
+  %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1
+  %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1
+  %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1
+  %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1
+  %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1
+  %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1
+  %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1
+  %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1
+  %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1
+  %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1
+  %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1
+  %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1
+  %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1
+  %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1
+  %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1
+  %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1
+  %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1
+  %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1
+  %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1
+  %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1
+  %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1
+  %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1
+  %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1
+  %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1
+  %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1
+  %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1
+  %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1
+  %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1
+  %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1
+  %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1
+  %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1
+  %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1
+  %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1
+  %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1
+  %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1
+  %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1
+  %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1
+  %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1
+  %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1
+  %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1
+  %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1
+  %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1
+  %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1
+  %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1
+  %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1
   %r0  = shl i8 %a0 , %b0
   %r1  = shl i8 %a1 , %b1
   %r2  = shl i8 %a2 , %b2
@@ -689,69 +689,69 @@ define void @shl_v64i8() {
   %r61 = shl i8 %a61, %b61
   %r62 = shl i8 %a62, %b62
   %r63 = shl i8 %a63, %b63
-  store i8 %r0 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 0 ), align 1
-  store i8 %r1 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 1 ), align 1
-  store i8 %r2 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 2 ), align 1
-  store i8 %r3 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 3 ), align 1
-  store i8 %r4 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 4 ), align 1
-  store i8 %r5 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 5 ), align 1
-  store i8 %r6 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 6 ), align 1
-  store i8 %r7 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 7 ), align 1
-  store i8 %r8 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 8 ), align 1
-  store i8 %r9 , i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 9 ), align 1
-  store i8 %r10, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 10), align 1
-  store i8 %r11, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 11), align 1
-  store i8 %r12, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 12), align 1
-  store i8 %r13, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 13), align 1
-  store i8 %r14, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 14), align 1
-  store i8 %r15, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 15), align 1
-  store i8 %r16, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 16), align 1
-  store i8 %r17, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 17), align 1
-  store i8 %r18, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 18), align 1
-  store i8 %r19, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 19), align 1
-  store i8 %r20, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 20), align 1
-  store i8 %r21, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 21), align 1
-  store i8 %r22, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 22), align 1
-  store i8 %r23, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 23), align 1
-  store i8 %r24, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 24), align 1
-  store i8 %r25, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 25), align 1
-  store i8 %r26, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 26), align 1
-  store i8 %r27, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 27), align 1
-  store i8 %r28, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 28), align 1
-  store i8 %r29, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 29), align 1
-  store i8 %r30, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 30), align 1
-  store i8 %r31, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 31), align 1
-  store i8 %r32, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 32), align 1
-  store i8 %r33, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 33), align 1
-  store i8 %r34, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 34), align 1
-  store i8 %r35, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 35), align 1
-  store i8 %r36, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 36), align 1
-  store i8 %r37, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 37), align 1
-  store i8 %r38, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 38), align 1
-  store i8 %r39, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 39), align 1
-  store i8 %r40, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 40), align 1
-  store i8 %r41, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 41), align 1
-  store i8 %r42, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 42), align 1
-  store i8 %r43, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 43), align 1
-  store i8 %r44, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 44), align 1
-  store i8 %r45, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 45), align 1
-  store i8 %r46, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 46), align 1
-  store i8 %r47, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 47), align 1
-  store i8 %r48, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 48), align 1
-  store i8 %r49, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 49), align 1
-  store i8 %r50, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 50), align 1
-  store i8 %r51, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 51), align 1
-  store i8 %r52, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 52), align 1
-  store i8 %r53, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 53), align 1
-  store i8 %r54, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 54), align 1
-  store i8 %r55, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 55), align 1
-  store i8 %r56, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 56), align 1
-  store i8 %r57, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 57), align 1
-  store i8 %r58, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 58), align 1
-  store i8 %r59, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 59), align 1
-  store i8 %r60, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 60), align 1
-  store i8 %r61, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 61), align 1
-  store i8 %r62, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 62), align 1
-  store i8 %r63, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @c8, i32 0, i64 63), align 1
+  store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1
+  store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1
+  store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1
+  store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1
+  store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1
+  store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1
+  store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1
+  store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1
+  store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1
+  store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1
+  store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1
+  store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1
+  store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1
+  store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1
+  store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1
+  store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1
+  store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1
+  store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1
+  store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1
+  store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1
+  store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1
+  store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1
+  store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1
+  store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1
+  store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1
+  store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1
+  store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1
+  store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1
+  store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1
+  store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1
+  store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1
+  store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1
+  store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1
+  store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1
+  store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1
+  store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1
+  store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1
+  store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1
+  store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1
+  store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1
+  store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1
+  store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1
+  store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1
+  store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1
+  store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1
+  store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1
+  store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1
+  store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1
+  store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1
+  store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1
+  store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1
+  store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1
+  store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1
+  store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1
+  store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1
+  store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1
+  store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1
+  store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1
+  store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1
+  store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1
+  store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1
+  store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1
+  store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1
+  store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
index 5cc9e5fdbf37e..9637e40d2b3e2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
@@ -1,13 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
 
-define void @wombat(i32* %ptr, i32* %ptr1) {
+define void @wombat(ptr %ptr, ptr %ptr1) {
 ; CHECK-LABEL: @wombat(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
 ; CHECK-NEXT:    [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
@@ -15,45 +13,43 @@ define void @wombat(i32* %ptr, i32* %ptr1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP27]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP27]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %tmp7 = getelementptr inbounds i32, i32* %ptr, i64 1
-  %tmp8 = getelementptr inbounds i32, i32* %ptr, i64 0
-  %tmp12 = load i32, i32* %tmp7, align 4
-  %tmp13 = load i32, i32* %tmp8, align 8
+  %tmp7 = getelementptr inbounds i32, ptr %ptr, i64 1
+  %tmp12 = load i32, ptr %tmp7, align 4
+  %tmp13 = load i32, ptr %ptr, align 8
   %tmp21 = add nsw i32 %tmp12, -1
   %tmp22 = fptosi float undef to i32
   %tmp23 = icmp slt i32 %tmp22, 0
   %tmp24 = icmp sgt i32 %tmp12, %tmp22
   %tmp25 = select i1 %tmp24, i32 %tmp22, i32 %tmp21
   %tmp26 = select i1 %tmp23, i32 0, i32 %tmp25
-  %tmp27 = getelementptr inbounds i32, i32* %ptr1, i32 3
-  store i32 %tmp26, i32* %tmp27, align 8
+  %tmp27 = getelementptr inbounds i32, ptr %ptr1, i32 3
+  store i32 %tmp26, ptr %tmp27, align 8
   %tmp28 = add nsw i32 %tmp13, -1
   %tmp29 = fptosi float undef to i32
   %tmp30 = icmp slt i32 %tmp29, 0
   %tmp31 = icmp sgt i32 %tmp13, %tmp29
   %tmp32 = select i1 %tmp31, i32 %tmp29, i32 %tmp28
   %tmp33 = select i1 %tmp30, i32 0, i32 %tmp32
-  %tmp34 = getelementptr inbounds i32, i32* %ptr1, i32 4
-  store i32 %tmp33, i32* %tmp34, align 4
+  %tmp34 = getelementptr inbounds i32, ptr %ptr1, i32 4
+  store i32 %tmp33, ptr %tmp34, align 4
   %tmp35 = fptosi float undef to i32
   %tmp36 = icmp slt i32 %tmp35, 0
   %tmp37 = icmp sgt i32 %tmp12, %tmp35
   %tmp38 = select i1 %tmp37, i32 %tmp35, i32 %tmp21
   %tmp39 = select i1 %tmp36, i32 0, i32 %tmp38
-  %tmp40 = getelementptr inbounds i32, i32* %ptr1, i32 5
-  store i32 %tmp39, i32* %tmp40, align 8
+  %tmp40 = getelementptr inbounds i32, ptr %ptr1, i32 5
+  store i32 %tmp39, ptr %tmp40, align 8
   %tmp41 = fptosi float undef to i32
   %tmp42 = icmp slt i32 %tmp41, 0
   %tmp43 = icmp sgt i32 %tmp13, %tmp41
   %tmp44 = select i1 %tmp43, i32 %tmp41, i32 %tmp28
   %tmp45 = select i1 %tmp42, i32 0, i32 %tmp44
-  %tmp46 = getelementptr inbounds i32, i32* %ptr1, i32 6
-  store i32 %tmp45, i32* %tmp46, align 4
+  %tmp46 = getelementptr inbounds i32, ptr %ptr1, i32 6
+  store i32 %tmp45, ptr %tmp46, align 4
   ret void
 }
 
@@ -66,7 +62,7 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() {
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], <8 x i16>* undef, align 2
+; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], ptr undef, align 2
 ; CHECK-NEXT:    [[SHRINK_SHUFFLE]] = shufflevector <8 x i16> [[SHUFFLE]], <8 x i16> poison, <2 x i32> <i32 0, i32 4>
 ; CHECK-NEXT:    br label [[FOR_BODY]]
 ;
@@ -79,20 +75,20 @@ for.body:                                         ; preds = %if.end, %entry
   br label %if.end
 
 if.end:                                           ; preds = %for.body
-  store i16 %P.sroa.0.0, i16* undef, align 2
-  %arrayidx11.1 = getelementptr inbounds i16, i16* undef, i32 1
-  store i16 %P.sroa.0.0, i16* %arrayidx11.1, align 2
-  %arrayidx11.2 = getelementptr inbounds i16, i16* undef, i32 2
-  store i16 %P.sroa.0.0, i16* %arrayidx11.2, align 2
-  %arrayidx11.3 = getelementptr inbounds i16, i16* undef, i32 3
-  store i16 %P.sroa.0.0, i16* %arrayidx11.3, align 2
-  %arrayidx11.4 = getelementptr inbounds i16, i16* undef, i32 4
-  store i16 %P.sroa.7.0, i16* %arrayidx11.4, align 2
-  %arrayidx11.5 = getelementptr inbounds i16, i16* undef, i32 5
-  store i16 %P.sroa.7.0, i16* %arrayidx11.5, align 2
-  %arrayidx11.6 = getelementptr inbounds i16, i16* undef, i32 6
-  store i16 %P.sroa.7.0, i16* %arrayidx11.6, align 2
-  %arrayidx11.7 = getelementptr inbounds i16, i16* undef, i32 7
-  store i16 %P.sroa.7.0, i16* %arrayidx11.7, align 2
+  store i16 %P.sroa.0.0, ptr undef, align 2
+  %arrayidx11.1 = getelementptr inbounds i16, ptr undef, i32 1
+  store i16 %P.sroa.0.0, ptr %arrayidx11.1, align 2
+  %arrayidx11.2 = getelementptr inbounds i16, ptr undef, i32 2
+  store i16 %P.sroa.0.0, ptr %arrayidx11.2, align 2
+  %arrayidx11.3 = getelementptr inbounds i16, ptr undef, i32 3
+  store i16 %P.sroa.0.0, ptr %arrayidx11.3, align 2
+  %arrayidx11.4 = getelementptr inbounds i16, ptr undef, i32 4
+  store i16 %P.sroa.7.0, ptr %arrayidx11.4, align 2
+  %arrayidx11.5 = getelementptr inbounds i16, ptr undef, i32 5
+  store i16 %P.sroa.7.0, ptr %arrayidx11.5, align 2
+  %arrayidx11.6 = getelementptr inbounds i16, ptr undef, i32 6
+  store i16 %P.sroa.7.0, ptr %arrayidx11.6, align 2
+  %arrayidx11.7 = getelementptr inbounds i16, ptr undef, i32 7
+  store i16 %P.sroa.7.0, ptr %arrayidx11.7, align 2
   br label %for.body
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
index 0f5790e8ad660..d70d54183a91c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
@@ -4,11 +4,9 @@
 %class.e = type { i32, i32 }
 %struct.a = type { i32, i32, i32, i32 }
 
-define void @foo(%class.e* %this, %struct.a* %p, i32 %add7) {
+define void @foo(ptr %this, ptr %p, i32 %add7) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.a* [[P:%.*]], i64 0, i32 0
-; CHECK-NEXT:    [[G:%.*]] = getelementptr inbounds [[CLASS_E:%.*]], %class.e* [[THIS:%.*]], i64 0, i32 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[ADD7:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], <i32 2, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
@@ -19,8 +17,7 @@ define void @foo(%class.e* %this, %struct.a* %p, i32 %add7) {
 ; CHECK:       sw.bb:
 ; CHECK-NEXT:    [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 2, i32 0>
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[G]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    br label [[SW_EPILOG]]
 ; CHECK:       sw.epilog:
@@ -28,17 +25,14 @@ define void @foo(%class.e* %this, %struct.a* %p, i32 %add7) {
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub <4 x i32> undef, [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %b = getelementptr inbounds %struct.a, %struct.a* %p, i64 0, i32 0
-  %c = getelementptr inbounds %struct.a, %struct.a* %p, i64 0, i32 1
-  %o = getelementptr inbounds %struct.a, %struct.a* %p, i64 0, i32 2
-  %d = getelementptr inbounds %struct.a, %struct.a* %p, i64 0, i32 3
-  %h = getelementptr inbounds %class.e, %class.e* %this, i64 0, i32 1
-  %g = getelementptr inbounds %class.e, %class.e* %this, i64 0, i32 0
+  %c = getelementptr inbounds %struct.a, ptr %p, i64 0, i32 1
+  %o = getelementptr inbounds %struct.a, ptr %p, i64 0, i32 2
+  %d = getelementptr inbounds %struct.a, ptr %p, i64 0, i32 3
+  %h = getelementptr inbounds %class.e, ptr %this, i64 0, i32 1
   %div = sdiv i32 undef, 2
   %div8 = sdiv i32 %add7, 2
   switch i32 undef, label %sw.epilog [
@@ -47,10 +41,10 @@ entry:
   ]
 
 sw.bb:
-  %0 = load i32, i32* %h, align 4
+  %0 = load i32, ptr %h, align 4
   %1 = xor i32 %div, -1
   %sub10 = add i32 %0, %1
-  %2 = load i32, i32* %g, align 4
+  %2 = load i32, ptr %this, align 4
   %3 = xor i32 %div8, -1
   %sub13 = add i32 %2, %3
   br label %sw.epilog
@@ -60,15 +54,15 @@ sw.epilog:
   %m.0 = phi i32 [ undef, %entry ], [ %sub13, %sw.bb ]
   %add15 = sub i32 undef, %div
   %sub16 = add i32 %add15, %l.0
-  store i32 %sub16, i32* %b, align 4
+  store i32 %sub16, ptr %p, align 4
   %add19 = sub i32 undef, %div
   %sub20 = add i32 %add19, %l.0
-  store i32 %sub20, i32* %c, align 4
+  store i32 %sub20, ptr %c, align 4
   %add23 = sub i32 undef, %div8
   %sub24 = add i32 %add23, %m.0
-  store i32 %sub24, i32* %o, align 4
+  store i32 %sub24, ptr %o, align 4
   %add27 = sub i32 undef, %div8
   %sub28 = add i32 %add27, %m.0
-  store i32 %sub28, i32* %d, align 4
+  store i32 %sub28, ptr %d, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-
diff -size.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-
diff -size.ll
index 332ac1f1876d3..23bada8f7790d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-
diff -size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffled-gathers-
diff -size.ll
@@ -1,73 +1,73 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-2 | FileCheck %s
 
-define void @foo(i32* noalias nocapture writeonly %B, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %C, i32 %n, i32 %m) {
+define void @foo(ptr noalias nocapture writeonly %B, ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %C, i32 %n, i32 %m) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], [[N:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[MUL2:%.*]] = mul nsw i32 [[TMP1]], [[M:%.*]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL2]], [[MUL]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i32 [[ADD]], [[TMP2]]
-; CHECK-NEXT:    store i32 [[MUL4]], i32* [[B:%.*]], align 4
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    store i32 [[MUL4]], ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4
 ; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP3]], [[M]]
 ; CHECK-NEXT:    [[ADD10:%.*]] = add nsw i32 [[MUL9]], [[MUL]]
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 2
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
 ; CHECK-NEXT:    [[MUL12:%.*]] = mul nsw i32 [[ADD10]], [[TMP4]]
-; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 1
-; CHECK-NEXT:    store i32 [[MUL12]], i32* [[ARRAYIDX13]], align 4
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 1
+; CHECK-NEXT:    store i32 [[MUL12]], ptr [[ARRAYIDX13]], align 4
 ; CHECK-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[TMP2]], [[N]]
 ; CHECK-NEXT:    [[MUL17:%.*]] = mul nsw i32 [[TMP4]], [[M]]
 ; CHECK-NEXT:    [[ADD18:%.*]] = add nsw i32 [[MUL17]], [[MUL15]]
 ; CHECK-NEXT:    [[MUL20:%.*]] = mul nsw i32 [[ADD18]], [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
-; CHECK-NEXT:    store i32 [[MUL20]], i32* [[ARRAYIDX21]], align 4
-; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 3
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX24]], align 4
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 2
+; CHECK-NEXT:    store i32 [[MUL20]], ptr [[ARRAYIDX21]], align 4
+; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 3
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4
 ; CHECK-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP5]], [[M]]
 ; CHECK-NEXT:    [[ADD26:%.*]] = add nsw i32 [[MUL25]], [[MUL15]]
 ; CHECK-NEXT:    [[MUL28:%.*]] = mul nsw i32 [[ADD26]], [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT:    store i32 [[MUL28]], i32* [[ARRAYIDX29]], align 4
+; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 3
+; CHECK-NEXT:    store i32 [[MUL28]], ptr [[ARRAYIDX29]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i32, i32* %A, align 4
+  %0 = load i32, ptr %A, align 4
   %mul = mul nsw i32 %0, %n
-  %1 = load i32, i32* %C, align 4
+  %1 = load i32, ptr %C, align 4
   %mul2 = mul nsw i32 %1, %m
   %add = add nsw i32 %mul2, %mul
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 1
-  %2 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 1
+  %2 = load i32, ptr %arrayidx3, align 4
   %mul4 = mul nsw i32 %add, %2
-  store i32 %mul4, i32* %B, align 4
-  %arrayidx8 = getelementptr inbounds i32, i32* %C, i64 1
-  %3 = load i32, i32* %arrayidx8, align 4
+  store i32 %mul4, ptr %B, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %C, i64 1
+  %3 = load i32, ptr %arrayidx8, align 4
   %mul9 = mul nsw i32 %3, %m
   %add10 = add nsw i32 %mul9, %mul
-  %arrayidx11 = getelementptr inbounds i32, i32* %C, i64 2
-  %4 = load i32, i32* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds i32, ptr %C, i64 2
+  %4 = load i32, ptr %arrayidx11, align 4
   %mul12 = mul nsw i32 %add10, %4
-  %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 1
-  store i32 %mul12, i32* %arrayidx13, align 4
+  %arrayidx13 = getelementptr inbounds i32, ptr %B, i64 1
+  store i32 %mul12, ptr %arrayidx13, align 4
   %mul15 = mul nsw i32 %2, %n
   %mul17 = mul nsw i32 %4, %m
   %add18 = add nsw i32 %mul17, %mul15
   %mul20 = mul nsw i32 %add18, %0
-  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 2
-  store i32 %mul20, i32* %arrayidx21, align 4
-  %arrayidx24 = getelementptr inbounds i32, i32* %C, i64 3
-  %5 = load i32, i32* %arrayidx24, align 4
+  %arrayidx21 = getelementptr inbounds i32, ptr %B, i64 2
+  store i32 %mul20, ptr %arrayidx21, align 4
+  %arrayidx24 = getelementptr inbounds i32, ptr %C, i64 3
+  %5 = load i32, ptr %arrayidx24, align 4
   %mul25 = mul nsw i32 %5, %m
   %add26 = add nsw i32 %mul25, %mul15
   %mul28 = mul nsw i32 %add26, %1
-  %arrayidx29 = getelementptr inbounds i32, i32* %B, i64 3
-  store i32 %mul28, i32* %arrayidx29, align 4
+  %arrayidx29 = getelementptr inbounds i32, ptr %B, i64 3
+  store i32 %mul28, ptr %arrayidx29, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
index aea0210afd7a2..cc7b0dab26ed6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -4,21 +4,19 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+define i32 @rollable(ptr noalias nocapture %in, ptr noalias nocapture %out, i64 %n) {
 ; CHECK-LABEL: @rollable(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[TMP10:%.*]], [[DOTLR_PH]] ], [ 0, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[I_019]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul <4 x i32> [[TMP6]], <i32 7, i32 7, i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 7, i32 14, i32 21, i32 28>
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP10]] = add i64 [[I_019]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP10]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]]
@@ -31,17 +29,17 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
-  %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %in, i64 %2
+  %4 = load i32, ptr %3, align 4
   %5 = or i64 %2, 1
-  %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32, i32* %6, align 4
+  %6 = getelementptr inbounds i32, ptr %in, i64 %5
+  %7 = load i32, ptr %6, align 4
   %8 = or i64 %2, 2
-  %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32, i32* %9, align 4
+  %9 = getelementptr inbounds i32, ptr %in, i64 %8
+  %10 = load i32, ptr %9, align 4
   %11 = or i64 %2, 3
-  %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32, i32* %12, align 4
+  %12 = getelementptr inbounds i32, ptr %in, i64 %11
+  %13 = load i32, ptr %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
@@ -50,14 +48,14 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
   %19 = add i32 %18, 21
   %20 = mul i32 %13, 7
   %21 = add i32 %20, 28
-  %22 = getelementptr inbounds i32, i32* %out, i64 %2
-  store i32 %15, i32* %22, align 4
-  %23 = getelementptr inbounds i32, i32* %out, i64 %5
-  store i32 %17, i32* %23, align 4
-  %24 = getelementptr inbounds i32, i32* %out, i64 %8
-  store i32 %19, i32* %24, align 4
-  %25 = getelementptr inbounds i32, i32* %out, i64 %11
-  store i32 %21, i32* %25, align 4
+  %22 = getelementptr inbounds i32, ptr %out, i64 %2
+  store i32 %15, ptr %22, align 4
+  %23 = getelementptr inbounds i32, ptr %out, i64 %5
+  store i32 %17, ptr %23, align 4
+  %24 = getelementptr inbounds i32, ptr %out, i64 %8
+  store i32 %19, ptr %24, align 4
+  %25 = getelementptr inbounds i32, ptr %out, i64 %11
+  store i32 %21, ptr %25, align 4
   %26 = add i64 %i.019, 1
   %exitcond = icmp eq i64 %26, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
@@ -66,31 +64,27 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
   ret i32 undef
 }
 
-define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
+define i32 @unrollable(ptr %in, ptr %out, i64 %n) nounwind ssp uwtable {
 ; CHECK-LABEL: @unrollable(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[TMP18:%.*]], [[DOTLR_PH]] ], [ 0, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[I_019]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <2 x i32> [[TMP8]], <i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i32> [[TMP9]], <i32 7, i32 14>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP14:%.*]] = load <2 x i32>, <2 x i32>* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <2 x i32> [[TMP14]], <i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[TMP15]], <i32 21, i32 28>
-; CHECK-NEXT:    store <2 x i32> [[TMP10]], <2 x i32>* [[TMP11]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP10]], ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[BARRIER:%.*]] = call i32 @goo(i32 0)
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP12]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP16]], <2 x i32>* [[TMP17]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP16]], ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP18]] = add i64 [[I_019]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP18]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]]
@@ -103,17 +97,17 @@ define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
-  %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %in, i64 %2
+  %4 = load i32, ptr %3, align 4
   %5 = or i64 %2, 1
-  %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32, i32* %6, align 4
+  %6 = getelementptr inbounds i32, ptr %in, i64 %5
+  %7 = load i32, ptr %6, align 4
   %8 = or i64 %2, 2
-  %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32, i32* %9, align 4
+  %9 = getelementptr inbounds i32, ptr %in, i64 %8
+  %10 = load i32, ptr %9, align 4
   %11 = or i64 %2, 3
-  %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32, i32* %12, align 4
+  %12 = getelementptr inbounds i32, ptr %in, i64 %11
+  %13 = load i32, ptr %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
@@ -122,15 +116,15 @@ define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
   %19 = add i32 %18, 21
   %20 = mul i32 %13, 7
   %21 = add i32 %20, 28
-  %22 = getelementptr inbounds i32, i32* %out, i64 %2
-  store i32 %15, i32* %22, align 4
-  %23 = getelementptr inbounds i32, i32* %out, i64 %5
-  store i32 %17, i32* %23, align 4
+  %22 = getelementptr inbounds i32, ptr %out, i64 %2
+  store i32 %15, ptr %22, align 4
+  %23 = getelementptr inbounds i32, ptr %out, i64 %5
+  store i32 %17, ptr %23, align 4
   %barrier = call i32 @goo(i32 0)                      ; <---------------- memory barrier.
-  %24 = getelementptr inbounds i32, i32* %out, i64 %8
-  store i32 %19, i32* %24, align 4
-  %25 = getelementptr inbounds i32, i32* %out, i64 %11
-  store i32 %21, i32* %25, align 4
+  %24 = getelementptr inbounds i32, ptr %out, i64 %8
+  store i32 %19, ptr %24, align 4
+  %25 = getelementptr inbounds i32, ptr %out, i64 %11
+  store i32 %21, ptr %25, align 4
   %26 = add i64 %i.019, 1
   %exitcond = icmp eq i64 %26, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
index cc8c0c920a71b..b64d91a170b2c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -5,119 +5,109 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) {
+define void @test1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8
   ret void
 }
 
 ; Simple 3-pair chain with loads and stores, obfuscated with bitcasts
-define void @test2(double* %a, double* %b, i8* %e) {
+define void @test2(ptr %a, ptr %b, ptr %e) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[C:%.*]] = bitcast i8* [[E:%.*]] to double*
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[E:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  %c = bitcast i8* %e to double*
-  store double %mul, double* %c, align 8
-  %carrayidx5 = getelementptr inbounds i8, i8* %e, i64 8
-  %arrayidx5 = bitcast i8* %carrayidx5 to double*
-  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, ptr %e, align 8
+  %carrayidx5 = getelementptr inbounds i8, ptr %e, i64 8
+  store double %mul5, ptr %carrayidx5, align 8
   ret void
 }
 
 ; Don't vectorize volatile loads.
-define void @test_volatile_load(double* %a, double* %b, double* %c) {
+define void @test_volatile_load(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_volatile_load(
-; CHECK-NEXT:    [[I0:%.*]] = load volatile double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[I1:%.*]] = load volatile double, double* [[B:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[I0:%.*]] = load volatile double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load volatile double, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[I0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[I3]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[I4]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %i0 = load volatile double, double* %a, align 8
-  %i1 = load volatile double, double* %b, align 8
+  %i0 = load volatile double, ptr %a, align 8
+  %i1 = load volatile double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8
   ret void
 }
 
 ; Don't vectorize volatile stores.
-define void @test_volatile_store(double* %a, double* %b, double* %c) {
+define void @test_volatile_store(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_volatile_store(
-; CHECK-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[I0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load double, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
-; CHECK-NEXT:    store volatile double [[MUL]], double* [[C:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
-; CHECK-NEXT:    store volatile double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    store volatile double [[MUL]], ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[C]], i64 1
+; CHECK-NEXT:    store volatile double [[MUL5]], ptr [[ARRAYIDX5]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store volatile double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store volatile double %mul5, double* %arrayidx5, align 8
+  store volatile double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store volatile double %mul5, ptr %arrayidx5, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
index 12d2f6a541850..7535355ba1715 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
@@ -9,14 +9,14 @@ declare double @llvm.sin.f64(double)
 
 define void @test() {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 0), align 8
-; CHECK-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 1), align 8
-; CHECK-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 2), align 8
-; CHECK-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 3), align 8
-; CHECK-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 4), align 8
-; CHECK-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 5), align 8
-; CHECK-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 6), align 8
-; CHECK-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr @src, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A4:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[A5:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 5), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A6]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
@@ -32,17 +32,17 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP14]]
-; CHECK-NEXT:    store <2 x double> [[TMP15]], <2 x double>* bitcast ([8 x double]* @dst to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP15]], ptr @dst, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 3), align 8
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 4), align 8
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 5), align 8
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 6), align 8
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 7), align 8
+  %a0 = load double, ptr @src, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 5), align 8
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8
   %sin0 = call fast double @llvm.sin.f64(double %a2)
   %sin1 = call fast double @llvm.sin.f64(double %a3)
   %sqrt0 = call fast double @llvm.sqrt.f64(double %a0)
@@ -57,7 +57,7 @@ define void @test() {
   %res3 = fadd fast double %sqrt2, %sin3
   %res4 = fadd fast double %sin2, %sqrt3
   %res01 = fadd fast double %res3, %res4
-  store double %res00, double* getelementptr inbounds ([8 x double], [8 x double]* @dst, i32 0, i64 0), align 8
-  store double %res01, double* getelementptr inbounds ([8 x double], [8 x double]* @dst, i32 0, i64 1), align 8
+  store double %res00, ptr @dst, align 8
+  store double %res01, ptr getelementptr inbounds ([8 x double], ptr @dst, i32 0, i64 1), align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
index 7d42a32983476..c4b2a9c33a014 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
@@ -22,112 +22,112 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @sitofp_2i64_2f64() #0 {
 ; SSE-LABEL: @sitofp_2i64_2f64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
-; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_2i64_2f64(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
-; AVX256NODQ-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_2i64_2f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
-; AVX512-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX512-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_2i64_2f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
-; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
   %cvt0 = sitofp i64 %ld0 to double
   %cvt1 = sitofp i64 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i64_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i64_4f64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; SSE-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
 ; SSE-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
-; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; SSE-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_4i64_4f64(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
 ; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
-; AVX256NODQ-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_4i64_4f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX512-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX512-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_4i64_4f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
   %cvt0 = sitofp i64 %ld0 to double
   %cvt1 = sitofp i64 %ld1 to double
   %cvt2 = sitofp i64 %ld2 to double
   %cvt3 = sitofp i64 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i64_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i64_8f64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; SSE-NEXT:    [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; SSE-NEXT:    [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+; SSE-NEXT:    [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; SSE-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
@@ -136,25 +136,25 @@ define void @sitofp_8i64_8f64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
 ; SSE-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
 ; SSE-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
-; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; SSE-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-; SSE-NEXT:    store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-; SSE-NEXT:    store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+; SSE-NEXT:    store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_8i64_8f64(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
@@ -163,39 +163,39 @@ define void @sitofp_8i64_8f64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
-; AVX256NODQ-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-; AVX256NODQ-NEXT:    store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-; AVX256NODQ-NEXT:    store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; AVX256NODQ-NEXT:    store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+; AVX256NODQ-NEXT:    store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i64_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_8i64_8f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x double>
-; AVX256DQ-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256DQ-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-  %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-  %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-  %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-  %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = sitofp i64 %ld0 to double
   %cvt1 = sitofp i64 %ld1 to double
   %cvt2 = sitofp i64 %ld2 to double
@@ -204,103 +204,103 @@ define void @sitofp_8i64_8f64() #0 {
   %cvt5 = sitofp i64 %ld5 to double
   %cvt6 = sitofp i64 %ld6 to double
   %cvt7 = sitofp i64 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sitofp_2i32_2f64() #0 {
 ; CHECK-LABEL: @sitofp_2i32_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
   %cvt0 = sitofp i32 %ld0 to double
   %cvt1 = sitofp i32 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i32_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i32_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_4i32_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
   %cvt0 = sitofp i32 %ld0 to double
   %cvt1 = sitofp i32 %ld1 to double
   %cvt2 = sitofp i32 %ld2 to double
   %cvt3 = sitofp i32 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i32_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i32_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_8i32_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i32_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
-  %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
-  %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
-  %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
-  %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
+  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
+  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = sitofp i32 %ld0 to double
   %cvt1 = sitofp i32 %ld1 to double
   %cvt2 = sitofp i32 %ld2 to double
@@ -309,103 +309,103 @@ define void @sitofp_8i32_8f64() #0 {
   %cvt5 = sitofp i32 %ld5 to double
   %cvt6 = sitofp i32 %ld6 to double
   %cvt7 = sitofp i32 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sitofp_2i16_2f64() #0 {
 ; CHECK-LABEL: @sitofp_2i16_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
   %cvt0 = sitofp i16 %ld0 to double
   %cvt1 = sitofp i16 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i16_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i16_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_4i16_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
   %cvt0 = sitofp i16 %ld0 to double
   %cvt1 = sitofp i16 %ld1 to double
   %cvt2 = sitofp i16 %ld2 to double
   %cvt3 = sitofp i16 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i16_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i16_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <2 x i16>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i16> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i16> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_8i16_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i16_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
-  %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
-  %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
+  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
+  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
   %cvt0 = sitofp i16 %ld0 to double
   %cvt1 = sitofp i16 %ld1 to double
   %cvt2 = sitofp i16 %ld2 to double
@@ -414,103 +414,103 @@ define void @sitofp_8i16_8f64() #0 {
   %cvt5 = sitofp i16 %ld5 to double
   %cvt6 = sitofp i16 %ld6 to double
   %cvt7 = sitofp i16 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sitofp_2i8_2f64() #0 {
 ; CHECK-LABEL: @sitofp_2i8_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
   %cvt0 = sitofp i8 %ld0 to double
   %cvt1 = sitofp i8 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i8_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i8_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_4i8_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
   %cvt0 = sitofp i8 %ld0 to double
   %cvt1 = sitofp i8 %ld1 to double
   %cvt2 = sitofp i8 %ld2 to double
   %cvt3 = sitofp i8 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i8_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i8_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <2 x i8>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i8> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i8> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_8i8_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i8_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
-  %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
-  %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
-  %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
-  %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
+  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
+  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
+  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
+  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
   %cvt0 = sitofp i8 %ld0 to double
   %cvt1 = sitofp i8 %ld1 to double
   %cvt2 = sitofp i8 %ld2 to double
@@ -519,14 +519,14 @@ define void @sitofp_8i8_8f64() #0 {
   %cvt5 = sitofp i8 %ld5 to double
   %cvt6 = sitofp i8 %ld6 to double
   %cvt7 = sitofp i8 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
@@ -536,90 +536,90 @@ define void @sitofp_8i8_8f64() #0 {
 
 define void @sitofp_2i64_2f32() #0 {
 ; SSE-LABEL: @sitofp_2i64_2f32(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; SSE-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; SSE-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; SSE-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_2i64_2f32(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; AVX256NODQ-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; AVX256NODQ-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; AVX256NODQ-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_2i64_2f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
-; AVX512-NEXT:    store <2 x float> [[TMP2]], <2 x float>* bitcast ([16 x float]* @dst32 to <2 x float>*), align 64
+; AVX512-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_2i64_2f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
-; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], <2 x float>* bitcast ([16 x float]* @dst32 to <2 x float>*), align 64
+; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
   %cvt0 = sitofp i64 %ld0 to float
   %cvt1 = sitofp i64 %ld1 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
   ret void
 }
 
 define void @sitofp_4i64_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i64_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
   %cvt0 = sitofp i64 %ld0 to float
   %cvt1 = sitofp i64 %ld1 to float
   %cvt2 = sitofp i64 %ld2 to float
   %cvt3 = sitofp i64 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i64_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i64_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i64_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-  %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-  %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-  %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-  %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = sitofp i64 %ld0 to float
   %cvt1 = sitofp i64 %ld1 to float
   %cvt2 = sitofp i64 %ld2 to float
@@ -628,63 +628,63 @@ define void @sitofp_8i64_8f32() #0 {
   %cvt5 = sitofp i64 %ld5 to float
   %cvt6 = sitofp i64 %ld6 to float
   %cvt7 = sitofp i64 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_4i32_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i32_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
   %cvt0 = sitofp i32 %ld0 to float
   %cvt1 = sitofp i32 %ld1 to float
   %cvt2 = sitofp i32 %ld2 to float
   %cvt3 = sitofp i32 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i32_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i32_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i32_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
-  %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
-  %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
-  %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
-  %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
+  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
+  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = sitofp i32 %ld0 to float
   %cvt1 = sitofp i32 %ld1 to float
   %cvt2 = sitofp i32 %ld2 to float
@@ -693,64 +693,64 @@ define void @sitofp_8i32_8f32() #0 {
   %cvt5 = sitofp i32 %ld5 to float
   %cvt6 = sitofp i32 %ld6 to float
   %cvt7 = sitofp i32 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_16i32_16f32() #0 {
 ; SSE-LABEL: @sitofp_16i32_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i32> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i32> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_16i32_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i32> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_16i32_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i32> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64
-  %ld1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8
-  %ld3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16
-  %ld5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8
-  %ld7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32
-  %ld9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8
-  %ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4
-  %ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16
-  %ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4
-  %ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8
-  %ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4
+  %ld0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
+  %ld1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
+  %ld3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
+  %ld5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
+  %ld7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
+  %ld9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
+  %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
+  %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
+  %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
   %cvt0  = sitofp i32 %ld0  to float
   %cvt1  = sitofp i32 %ld1  to float
   %cvt2  = sitofp i32 %ld2  to float
@@ -767,71 +767,71 @@ define void @sitofp_16i32_16f32() #0 {
   %cvt13 = sitofp i32 %ld13 to float
   %cvt14 = sitofp i32 %ld14 to float
   %cvt15 = sitofp i32 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sitofp_4i16_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i16_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
   %cvt0 = sitofp i16 %ld0 to float
   %cvt1 = sitofp i16 %ld1 to float
   %cvt2 = sitofp i16 %ld2 to float
   %cvt3 = sitofp i16 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i16_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i16_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i16_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
-  %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
-  %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
+  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
+  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
   %cvt0 = sitofp i16 %ld0 to float
   %cvt1 = sitofp i16 %ld1 to float
   %cvt2 = sitofp i16 %ld2 to float
@@ -840,64 +840,64 @@ define void @sitofp_8i16_8f32() #0 {
   %cvt5 = sitofp i16 %ld5 to float
   %cvt6 = sitofp i16 %ld6 to float
   %cvt7 = sitofp i16 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_16i16_16f32() #0 {
 ; SSE-LABEL: @sitofp_16i16_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i16> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_16i16_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i16> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_16i16_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64
-  %ld1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4
-  %ld3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8
-  %ld5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4
-  %ld7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16
-  %ld9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
-  %ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
-  %ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
-  %ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
+  %ld0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
+  %ld1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
+  %ld3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
+  %ld5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
+  %ld7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
+  %ld9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
+  %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
+  %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
+  %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
   %cvt0  = sitofp i16 %ld0  to float
   %cvt1  = sitofp i16 %ld1  to float
   %cvt2  = sitofp i16 %ld2  to float
@@ -914,71 +914,71 @@ define void @sitofp_16i16_16f32() #0 {
   %cvt13 = sitofp i16 %ld13 to float
   %cvt14 = sitofp i16 %ld14 to float
   %cvt15 = sitofp i16 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sitofp_4i8_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i8_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
   %cvt0 = sitofp i8 %ld0 to float
   %cvt1 = sitofp i8 %ld1 to float
   %cvt2 = sitofp i8 %ld2 to float
   %cvt3 = sitofp i8 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i8_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i8_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i8_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
-  %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
-  %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
-  %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
-  %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
+  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
+  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
+  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
+  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
   %cvt0 = sitofp i8 %ld0 to float
   %cvt1 = sitofp i8 %ld1 to float
   %cvt2 = sitofp i8 %ld2 to float
@@ -987,64 +987,64 @@ define void @sitofp_8i8_8f32() #0 {
   %cvt5 = sitofp i8 %ld5 to float
   %cvt6 = sitofp i8 %ld6 to float
   %cvt7 = sitofp i8 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_16i8_16f32() #0 {
 ; SSE-LABEL: @sitofp_16i8_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i8> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i8> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_16i8_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i8> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_16i8_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i8> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64
-  %ld1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2
-  %ld3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4
-  %ld5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2
-  %ld7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8
-  %ld9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2
-  %ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4
-  %ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2
-  %ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1
+  %ld0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
+  %ld1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
+  %ld3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
+  %ld5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
+  %ld7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
+  %ld9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
+  %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
+  %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
+  %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
   %cvt0  = sitofp i8 %ld0  to float
   %cvt1  = sitofp i8 %ld1  to float
   %cvt2  = sitofp i8 %ld2  to float
@@ -1061,22 +1061,22 @@ define void @sitofp_16i8_16f32() #0 {
   %cvt13 = sitofp i8 %ld13 to float
   %cvt14 = sitofp i8 %ld14 to float
   %cvt15 = sitofp i8 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
index 1585cb247510d..83205cc772994 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
@@ -22,112 +22,112 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @sitofp_2i64_2f64() #0 {
 ; SSE-LABEL: @sitofp_2i64_2f64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
-; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_2i64_2f64(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
-; AVX256NODQ-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_2i64_2f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
-; AVX512-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX512-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_2i64_2f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
-; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
   %cvt0 = sitofp i64 %ld0 to double
   %cvt1 = sitofp i64 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i64_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i64_4f64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; SSE-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
 ; SSE-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
-; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; SSE-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_4i64_4f64(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
 ; AVX256NODQ-NEXT:    [[CVT3:%.*]] = sitofp i64 [[LD3]] to double
-; AVX256NODQ-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_4i64_4f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX512-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX512-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_4i64_4f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
   %cvt0 = sitofp i64 %ld0 to double
   %cvt1 = sitofp i64 %ld1 to double
   %cvt2 = sitofp i64 %ld2 to double
   %cvt3 = sitofp i64 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i64_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i64_8f64(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; SSE-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; SSE-NEXT:    [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; SSE-NEXT:    [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+; SSE-NEXT:    [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+; SSE-NEXT:    [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+; SSE-NEXT:    [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; SSE-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
@@ -136,25 +136,25 @@ define void @sitofp_8i64_8f64() #0 {
 ; SSE-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
 ; SSE-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
 ; SSE-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
-; SSE-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; SSE-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; SSE-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; SSE-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; SSE-NEXT:    store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-; SSE-NEXT:    store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; SSE-NEXT:    store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-; SSE-NEXT:    store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; SSE-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; SSE-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; SSE-NEXT:    store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; SSE-NEXT:    store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+; SSE-NEXT:    store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_8i64_8f64(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    [[LD4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+; AVX256NODQ-NEXT:    [[LD5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    [[LD6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+; AVX256NODQ-NEXT:    [[LD7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
 ; AVX256NODQ-NEXT:    [[CVT2:%.*]] = sitofp i64 [[LD2]] to double
@@ -163,39 +163,39 @@ define void @sitofp_8i64_8f64() #0 {
 ; AVX256NODQ-NEXT:    [[CVT5:%.*]] = sitofp i64 [[LD5]] to double
 ; AVX256NODQ-NEXT:    [[CVT6:%.*]] = sitofp i64 [[LD6]] to double
 ; AVX256NODQ-NEXT:    [[CVT7:%.*]] = sitofp i64 [[LD7]] to double
-; AVX256NODQ-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX256NODQ-NEXT:    store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-; AVX256NODQ-NEXT:    store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-; AVX256NODQ-NEXT:    store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-; AVX256NODQ-NEXT:    store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-; AVX256NODQ-NEXT:    store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-; AVX256NODQ-NEXT:    store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; AVX256NODQ-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX256NODQ-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    store double [[CVT2]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; AVX256NODQ-NEXT:    store double [[CVT3]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+; AVX256NODQ-NEXT:    store double [[CVT4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; AVX256NODQ-NEXT:    store double [[CVT5]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+; AVX256NODQ-NEXT:    store double [[CVT6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+; AVX256NODQ-NEXT:    store double [[CVT7]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i64_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_8i64_8f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; AVX256DQ-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256DQ-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; AVX256DQ-NEXT:    [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x double>
-; AVX256DQ-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256DQ-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-  %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-  %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-  %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-  %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = sitofp i64 %ld0 to double
   %cvt1 = sitofp i64 %ld1 to double
   %cvt2 = sitofp i64 %ld2 to double
@@ -204,103 +204,103 @@ define void @sitofp_8i64_8f64() #0 {
   %cvt5 = sitofp i64 %ld5 to double
   %cvt6 = sitofp i64 %ld6 to double
   %cvt7 = sitofp i64 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sitofp_2i32_2f64() #0 {
 ; CHECK-LABEL: @sitofp_2i32_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
   %cvt0 = sitofp i32 %ld0 to double
   %cvt1 = sitofp i32 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i32_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i32_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_4i32_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
   %cvt0 = sitofp i32 %ld0 to double
   %cvt1 = sitofp i32 %ld1 to double
   %cvt2 = sitofp i32 %ld2 to double
   %cvt3 = sitofp i32 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i32_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i32_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i32> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_8i32_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i32_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
-  %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
-  %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
-  %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
-  %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
+  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
+  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = sitofp i32 %ld0 to double
   %cvt1 = sitofp i32 %ld1 to double
   %cvt2 = sitofp i32 %ld2 to double
@@ -309,103 +309,103 @@ define void @sitofp_8i32_8f64() #0 {
   %cvt5 = sitofp i32 %ld5 to double
   %cvt6 = sitofp i32 %ld6 to double
   %cvt7 = sitofp i32 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sitofp_2i16_2f64() #0 {
 ; CHECK-LABEL: @sitofp_2i16_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
   %cvt0 = sitofp i16 %ld0 to double
   %cvt1 = sitofp i16 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i16_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i16_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_4i16_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
   %cvt0 = sitofp i16 %ld0 to double
   %cvt1 = sitofp i16 %ld1 to double
   %cvt2 = sitofp i16 %ld2 to double
   %cvt3 = sitofp i16 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i16_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i16_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <2 x i16>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i16> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i16> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_8i16_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i16_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
-  %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
-  %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
+  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
+  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
   %cvt0 = sitofp i16 %ld0 to double
   %cvt1 = sitofp i16 %ld1 to double
   %cvt2 = sitofp i16 %ld2 to double
@@ -414,103 +414,103 @@ define void @sitofp_8i16_8f64() #0 {
   %cvt5 = sitofp i16 %ld5 to double
   %cvt6 = sitofp i16 %ld6 to double
   %cvt7 = sitofp i16 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @sitofp_2i8_2f64() #0 {
 ; CHECK-LABEL: @sitofp_2i8_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
   %cvt0 = sitofp i8 %ld0 to double
   %cvt1 = sitofp i8 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sitofp_4i8_4f64() #0 {
 ; SSE-LABEL: @sitofp_4i8_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_4i8_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
   %cvt0 = sitofp i8 %ld0 to double
   %cvt1 = sitofp i8 %ld1 to double
   %cvt2 = sitofp i8 %ld2 to double
   %cvt3 = sitofp i8 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sitofp_8i8_8f64() #0 {
 ; SSE-LABEL: @sitofp_8i8_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <2 x i8> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <2 x i8>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <2 x i8> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <2 x i8> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_8i8_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_8i8_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
-  %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
-  %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
-  %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
-  %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
+  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
+  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
+  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
+  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
   %cvt0 = sitofp i8 %ld0 to double
   %cvt1 = sitofp i8 %ld1 to double
   %cvt2 = sitofp i8 %ld2 to double
@@ -519,14 +519,14 @@ define void @sitofp_8i8_8f64() #0 {
   %cvt5 = sitofp i8 %ld5 to double
   %cvt6 = sitofp i8 %ld6 to double
   %cvt7 = sitofp i8 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
@@ -536,90 +536,90 @@ define void @sitofp_8i8_8f64() #0 {
 
 define void @sitofp_2i64_2f32() #0 {
 ; SSE-LABEL: @sitofp_2i64_2f32(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
 ; SSE-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; SSE-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; SSE-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; SSE-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256NODQ-LABEL: @sitofp_2i64_2f32(
-; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX256NODQ-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX256NODQ-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX256NODQ-NEXT:    [[CVT0:%.*]] = sitofp i64 [[LD0]] to float
 ; AVX256NODQ-NEXT:    [[CVT1:%.*]] = sitofp i64 [[LD1]] to float
-; AVX256NODQ-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX256NODQ-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; AVX256NODQ-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; AVX256NODQ-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; AVX256NODQ-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_2i64_2f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
-; AVX512-NEXT:    store <2 x float> [[TMP2]], <2 x float>* bitcast ([16 x float]* @dst32 to <2 x float>*), align 64
+; AVX512-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @sitofp_2i64_2f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x float>
-; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], <2 x float>* bitcast ([16 x float]* @dst32 to <2 x float>*), align 64
+; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
   %cvt0 = sitofp i64 %ld0 to float
   %cvt1 = sitofp i64 %ld1 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
   ret void
 }
 
 define void @sitofp_4i64_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i64_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
   %cvt0 = sitofp i64 %ld0 to float
   %cvt1 = sitofp i64 %ld1 to float
   %cvt2 = sitofp i64 %ld2 to float
   %cvt3 = sitofp i64 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i64_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i64_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i64> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i64_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-  %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-  %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-  %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-  %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = sitofp i64 %ld0 to float
   %cvt1 = sitofp i64 %ld1 to float
   %cvt2 = sitofp i64 %ld2 to float
@@ -628,63 +628,63 @@ define void @sitofp_8i64_8f32() #0 {
   %cvt5 = sitofp i64 %ld5 to float
   %cvt6 = sitofp i64 %ld6 to float
   %cvt7 = sitofp i64 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_4i32_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i32_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
   %cvt0 = sitofp i32 %ld0 to float
   %cvt1 = sitofp i32 %ld1 to float
   %cvt2 = sitofp i32 %ld2 to float
   %cvt3 = sitofp i32 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i32_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i32_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i32_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
-  %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
-  %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
-  %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
-  %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
+  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
+  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = sitofp i32 %ld0 to float
   %cvt1 = sitofp i32 %ld1 to float
   %cvt2 = sitofp i32 %ld2 to float
@@ -693,64 +693,64 @@ define void @sitofp_8i32_8f32() #0 {
   %cvt5 = sitofp i32 %ld5 to float
   %cvt6 = sitofp i32 %ld6 to float
   %cvt7 = sitofp i32 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_16i32_16f32() #0 {
 ; SSE-LABEL: @sitofp_16i32_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i32> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i32> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_16i32_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i32> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_16i32_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i32> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64
-  %ld1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8
-  %ld3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16
-  %ld5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8
-  %ld7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32
-  %ld9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8
-  %ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4
-  %ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16
-  %ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4
-  %ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8
-  %ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4
+  %ld0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
+  %ld1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
+  %ld3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
+  %ld5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
+  %ld7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
+  %ld9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
+  %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
+  %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
+  %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
   %cvt0  = sitofp i32 %ld0  to float
   %cvt1  = sitofp i32 %ld1  to float
   %cvt2  = sitofp i32 %ld2  to float
@@ -767,71 +767,71 @@ define void @sitofp_16i32_16f32() #0 {
   %cvt13 = sitofp i32 %ld13 to float
   %cvt14 = sitofp i32 %ld14 to float
   %cvt15 = sitofp i32 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sitofp_4i16_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i16_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
   %cvt0 = sitofp i16 %ld0 to float
   %cvt1 = sitofp i16 %ld1 to float
   %cvt2 = sitofp i16 %ld2 to float
   %cvt3 = sitofp i16 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i16_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i16_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i16_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
-  %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
-  %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
+  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
+  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
   %cvt0 = sitofp i16 %ld0 to float
   %cvt1 = sitofp i16 %ld1 to float
   %cvt2 = sitofp i16 %ld2 to float
@@ -840,64 +840,64 @@ define void @sitofp_8i16_8f32() #0 {
   %cvt5 = sitofp i16 %ld5 to float
   %cvt6 = sitofp i16 %ld6 to float
   %cvt7 = sitofp i16 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_16i16_16f32() #0 {
 ; SSE-LABEL: @sitofp_16i16_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i16> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_16i16_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i16> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_16i16_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64
-  %ld1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4
-  %ld3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8
-  %ld5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4
-  %ld7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16
-  %ld9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
-  %ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
-  %ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
-  %ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
+  %ld0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
+  %ld1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
+  %ld3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
+  %ld5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
+  %ld7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
+  %ld9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
+  %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
+  %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
+  %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
   %cvt0  = sitofp i16 %ld0  to float
   %cvt1  = sitofp i16 %ld1  to float
   %cvt2  = sitofp i16 %ld2  to float
@@ -914,71 +914,71 @@ define void @sitofp_16i16_16f32() #0 {
   %cvt13 = sitofp i16 %ld13 to float
   %cvt14 = sitofp i16 %ld14 to float
   %cvt15 = sitofp i16 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @sitofp_4i8_4f32() #0 {
 ; CHECK-LABEL: @sitofp_4i8_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
   %cvt0 = sitofp i8 %ld0 to float
   %cvt1 = sitofp i8 %ld1 to float
   %cvt2 = sitofp i8 %ld2 to float
   %cvt3 = sitofp i8 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sitofp_8i8_8f32() #0 {
 ; SSE-LABEL: @sitofp_8i8_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sitofp_8i8_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
-  %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
-  %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
-  %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
-  %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
+  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
+  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
+  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
+  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
   %cvt0 = sitofp i8 %ld0 to float
   %cvt1 = sitofp i8 %ld1 to float
   %cvt2 = sitofp i8 %ld2 to float
@@ -987,64 +987,64 @@ define void @sitofp_8i8_8f32() #0 {
   %cvt5 = sitofp i8 %ld5 to float
   %cvt6 = sitofp i8 %ld6 to float
   %cvt7 = sitofp i8 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sitofp_16i8_16f32() #0 {
 ; SSE-LABEL: @sitofp_16i8_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = sitofp <4 x i8> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = sitofp <4 x i8> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = sitofp <4 x i8> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sitofp_16i8_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
 ; AVX256-NEXT:    [[TMP4:%.*]] = sitofp <8 x i8> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sitofp_16i8_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = sitofp <16 x i8> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64
-  %ld1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2
-  %ld3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4
-  %ld5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2
-  %ld7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8
-  %ld9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2
-  %ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4
-  %ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2
-  %ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1
+  %ld0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
+  %ld1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
+  %ld3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
+  %ld5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
+  %ld7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
+  %ld9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
+  %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
+  %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
+  %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
   %cvt0  = sitofp i8 %ld0  to float
   %cvt1  = sitofp i8 %ld1  to float
   %cvt2  = sitofp i8 %ld2  to float
@@ -1061,22 +1061,22 @@ define void @sitofp_16i8_16f32() #0 {
   %cvt13 = sitofp i8 %ld13 to float
   %cvt14 = sitofp i8 %ld14 to float
   %cvt15 = sitofp i8 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
index 5fdcfad3ab39b..17dc47ae9b5a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
@@ -1,13 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
 
-define dso_local void @rftbsub(double* %a) local_unnamed_addr #0 {
+define dso_local void @rftbsub(ptr %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: @rftbsub(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2
 ; CHECK-NEXT:    [[SUB22:%.*]] = fsub double undef, undef
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX6]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[ADD16:%.*]] = fadd double [[TMP2]], undef
 ; CHECK-NEXT:    [[MUL18:%.*]] = fmul double undef, [[ADD16]]
@@ -15,23 +14,22 @@ define dso_local void @rftbsub(double* %a) local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[ADD19]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[SUB22]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[ARRAYIDX6]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[ARRAYIDX6]], align 8
 ; CHECK-NEXT:    unreachable
 ;
 entry:
-  %arrayidx6 = getelementptr inbounds double, double* %a, i64 2
-  %0 = load double, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %a, i64 2
+  %0 = load double, ptr %arrayidx6, align 8
   %1 = or i64 2, 1
-  %arrayidx12 = getelementptr inbounds double, double* %a, i64 %1
-  %2 = load double, double* %arrayidx12, align 8
+  %arrayidx12 = getelementptr inbounds double, ptr %a, i64 %1
+  %2 = load double, ptr %arrayidx12, align 8
   %add16 = fadd double %2, undef
   %mul18 = fmul double undef, %add16
   %add19 = fadd double undef, %mul18
   %sub22 = fsub double undef, undef
   %sub25 = fsub double %0, %add19
-  store double %sub25, double* %arrayidx6, align 8
+  store double %sub25, ptr %arrayidx6, align 8
   %sub29 = fsub double %2, %sub22
-  store double %sub29, double* %arrayidx12, align 8
+  store double %sub29, ptr %arrayidx12, align 8
   unreachable
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll
index e6dc304c3027f..3b0710f72c8d4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll
@@ -3,92 +3,88 @@
 
 %struct.S = type { [8 x i32], [8 x i32], [16 x i32] }
 
-define dso_local void @_Z4testP1S(%struct.S* %p) local_unnamed_addr {
+define dso_local void @_Z4testP1S(ptr %p) local_unnamed_addr {
 ; CHECK-LABEL: @_Z4testP1S(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 1, i64 0
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 15
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 7
-; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 6
-; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 4
-; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 12
-; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 13
-; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 14
-; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 2, i64 5
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32*> poison, i32* [[ARRAYIDX1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32*> [[TMP2]], i32* [[ARRAYIDX6]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32*> [[TMP3]], i32* [[ARRAYIDX13]], i32 2
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32*> [[TMP4]], i32* [[ARRAYIDX20]], i32 3
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32*> [[TMP5]], i32* [[ARRAYIDX27]], i32 4
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32*> [[TMP6]], i32* [[ARRAYIDX34]], i32 5
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32*> [[TMP7]], i32* [[ARRAYIDX41]], i32 6
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32*> [[TMP8]], i32* [[ARRAYIDX48]], i32 7
-; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP9]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[P:%.*]], i64 0, i32 1, i64 0
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 15
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 7
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 6
+; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 4
+; CHECK-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 12
+; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 13
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 14
+; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 0, i32 2, i64 5
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX6]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX13]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX20]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX27]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX34]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x ptr> [[TMP7]], ptr [[ARRAYIDX41]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x ptr> [[TMP8]], ptr [[ARRAYIDX48]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP9]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
 ; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <8 x i32> [[TMP10]], [[TMP1]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[TMP11]], <8 x i32>* [[TMP12]], align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP11]], ptr [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 0
-  %i = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 15
-  %i1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 0
+  %i = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 15
+  %i1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %i1, %i
-  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 0
-  store i32 %add, i32* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 1
-  %i2 = load i32, i32* %arrayidx4, align 4
-  %arrayidx6 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 7
-  %i3 = load i32, i32* %arrayidx6, align 4
+  store i32 %add, ptr %p, align 4
+  %arrayidx4 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 1
+  %i2 = load i32, ptr %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 7
+  %i3 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %i3, %i2
-  %arrayidx9 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 1
-  store i32 %add7, i32* %arrayidx9, align 4
-  %arrayidx11 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 2
-  %i4 = load i32, i32* %arrayidx11, align 4
-  %arrayidx13 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 6
-  %i5 = load i32, i32* %arrayidx13, align 4
+  %arrayidx9 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 1
+  store i32 %add7, ptr %arrayidx9, align 4
+  %arrayidx11 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 2
+  %i4 = load i32, ptr %arrayidx11, align 4
+  %arrayidx13 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 6
+  %i5 = load i32, ptr %arrayidx13, align 4
   %add14 = add nsw i32 %i5, %i4
-  %arrayidx16 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 2
-  store i32 %add14, i32* %arrayidx16, align 4
-  %arrayidx18 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 3
-  %i6 = load i32, i32* %arrayidx18, align 4
-  %arrayidx20 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 4
-  %i7 = load i32, i32* %arrayidx20, align 4
+  %arrayidx16 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 2
+  store i32 %add14, ptr %arrayidx16, align 4
+  %arrayidx18 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 3
+  %i6 = load i32, ptr %arrayidx18, align 4
+  %arrayidx20 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 4
+  %i7 = load i32, ptr %arrayidx20, align 4
   %add21 = add nsw i32 %i7, %i6
-  %arrayidx23 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 3
-  store i32 %add21, i32* %arrayidx23, align 4
-  %arrayidx25 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 4
-  %i8 = load i32, i32* %arrayidx25, align 4
-  %arrayidx27 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 12
-  %i9 = load i32, i32* %arrayidx27, align 4
+  %arrayidx23 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 3
+  store i32 %add21, ptr %arrayidx23, align 4
+  %arrayidx25 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 4
+  %i8 = load i32, ptr %arrayidx25, align 4
+  %arrayidx27 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 12
+  %i9 = load i32, ptr %arrayidx27, align 4
   %add28 = add nsw i32 %i9, %i8
-  %arrayidx30 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 4
-  store i32 %add28, i32* %arrayidx30, align 4
-  %arrayidx32 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 5
-  %i10 = load i32, i32* %arrayidx32, align 4
-  %arrayidx34 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 13
-  %i11 = load i32, i32* %arrayidx34, align 4
+  %arrayidx30 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 4
+  store i32 %add28, ptr %arrayidx30, align 4
+  %arrayidx32 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 5
+  %i10 = load i32, ptr %arrayidx32, align 4
+  %arrayidx34 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 13
+  %i11 = load i32, ptr %arrayidx34, align 4
   %add35 = add nsw i32 %i11, %i10
-  %arrayidx37 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 5
-  store i32 %add35, i32* %arrayidx37, align 4
-  %arrayidx39 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 6
-  %i12 = load i32, i32* %arrayidx39, align 4
-  %arrayidx41 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 14
-  %i13 = load i32, i32* %arrayidx41, align 4
+  %arrayidx37 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 5
+  store i32 %add35, ptr %arrayidx37, align 4
+  %arrayidx39 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 6
+  %i12 = load i32, ptr %arrayidx39, align 4
+  %arrayidx41 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 14
+  %i13 = load i32, ptr %arrayidx41, align 4
   %add42 = add nsw i32 %i13, %i12
-  %arrayidx44 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 6
-  store i32 %add42, i32* %arrayidx44, align 4
-  %arrayidx46 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1, i64 7
-  %i14 = load i32, i32* %arrayidx46, align 4
-  %arrayidx48 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 2, i64 5
-  %i15 = load i32, i32* %arrayidx48, align 4
+  %arrayidx44 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 6
+  store i32 %add42, ptr %arrayidx44, align 4
+  %arrayidx46 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 1, i64 7
+  %i14 = load i32, ptr %arrayidx46, align 4
+  %arrayidx48 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 2, i64 5
+  %i15 = load i32, ptr %arrayidx48, align 4
   %add49 = add nsw i32 %i15, %i14
-  %arrayidx51 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 7
-  store i32 %add49, i32* %arrayidx51, align 4
+  %arrayidx51 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 7
+  store i32 %add49, ptr %arrayidx51, align 4
   ret void
 }
 
@@ -98,84 +94,74 @@ entry:
 ; (3) these loads vectorized as a part of tree that is seeded by stores
 ; and with VF=8.
 
-define dso_local void @test_unordered_splits(%struct.S* nocapture %p) local_unnamed_addr {
+define dso_local void @test_unordered_splits(ptr nocapture %p) local_unnamed_addr {
 ; CHECK-LABEL: @test_unordered_splits(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = alloca [16 x i32], align 16
 ; CHECK-NEXT:    [[P2:%.*]] = alloca [16 x i32], align 16
-; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 4
-; CHECK-NEXT:    [[G20:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 12
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[G10]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[G20]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds [16 x i32], ptr [[P1]], i32 0, i64 4
+; CHECK-NEXT:    [[G20:%.*]] = getelementptr inbounds [16 x i32], ptr [[P2]], i32 0, i64 12
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[G10]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[G20]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <8 x i32> [[SHUFFLE]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   %p1 = alloca [16 x i32], align 16
   %p2 = alloca [16 x i32], align 16
-  %g10 = getelementptr inbounds [16 x i32], [16 x i32]* %p1, i32 0, i64 4
-  %g11 = getelementptr inbounds [16 x i32], [16 x i32]* %p1, i32 0, i64 5
-  %g12 = getelementptr inbounds [16 x i32], [16 x i32]* %p1, i32 0, i64 6
-  %g13 = getelementptr inbounds [16 x i32], [16 x i32]* %p1, i32 0, i64 7
-  %g20 = getelementptr inbounds [16 x i32], [16 x i32]* %p2, i32 0, i64 12
-  %g21 = getelementptr inbounds [16 x i32], [16 x i32]* %p2, i32 0, i64 13
-  %g22 = getelementptr inbounds [16 x i32], [16 x i32]* %p2, i32 0, i64 14
-  %g23 = getelementptr inbounds [16 x i32], [16 x i32]* %p2, i32 0, i64 15
-  %i1 = load i32, i32* %g11, align 4
-  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 0
-  store i32 %i1, i32* %arrayidx2, align 4
-  %i3 = load i32, i32* %g10, align 4
-  %arrayidx9 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 1
-  store i32 %i3, i32* %arrayidx9, align 4
-  %i5 = load i32, i32* %g12, align 4
-  %arrayidx16 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 2
-  store i32 %i5, i32* %arrayidx16, align 4
-  %i7 = load i32, i32* %g13, align 4
-  %arrayidx23 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 3
-  store i32 %i7, i32* %arrayidx23, align 4
-  %i9 = load i32, i32* %g23, align 4
-  %arrayidx30 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 4
-  store i32 %i9, i32* %arrayidx30, align 4
-  %i11 = load i32, i32* %g21, align 4
-  %arrayidx37 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 5
-  store i32 %i11, i32* %arrayidx37, align 4
-  %i13 = load i32, i32* %g22, align 4
-  %arrayidx44 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 6
-  store i32 %i13, i32* %arrayidx44, align 4
-  %i15 = load i32, i32* %g20, align 4
-  %arrayidx51 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 7
-  store i32 %i15, i32* %arrayidx51, align 4
+  %g10 = getelementptr inbounds [16 x i32], ptr %p1, i32 0, i64 4
+  %g11 = getelementptr inbounds [16 x i32], ptr %p1, i32 0, i64 5
+  %g12 = getelementptr inbounds [16 x i32], ptr %p1, i32 0, i64 6
+  %g13 = getelementptr inbounds [16 x i32], ptr %p1, i32 0, i64 7
+  %g20 = getelementptr inbounds [16 x i32], ptr %p2, i32 0, i64 12
+  %g21 = getelementptr inbounds [16 x i32], ptr %p2, i32 0, i64 13
+  %g22 = getelementptr inbounds [16 x i32], ptr %p2, i32 0, i64 14
+  %g23 = getelementptr inbounds [16 x i32], ptr %p2, i32 0, i64 15
+  %i1 = load i32, ptr %g11, align 4
+  store i32 %i1, ptr %p, align 4
+  %i3 = load i32, ptr %g10, align 4
+  %arrayidx9 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 1
+  store i32 %i3, ptr %arrayidx9, align 4
+  %i5 = load i32, ptr %g12, align 4
+  %arrayidx16 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 2
+  store i32 %i5, ptr %arrayidx16, align 4
+  %i7 = load i32, ptr %g13, align 4
+  %arrayidx23 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 3
+  store i32 %i7, ptr %arrayidx23, align 4
+  %i9 = load i32, ptr %g23, align 4
+  %arrayidx30 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 4
+  store i32 %i9, ptr %arrayidx30, align 4
+  %i11 = load i32, ptr %g21, align 4
+  %arrayidx37 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 5
+  store i32 %i11, ptr %arrayidx37, align 4
+  %i13 = load i32, ptr %g22, align 4
+  %arrayidx44 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 6
+  store i32 %i13, ptr %arrayidx44, align 4
+  %i15 = load i32, ptr %g20, align 4
+  %arrayidx51 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 7
+  store i32 %i15, ptr %arrayidx51, align 4
   ret void
 }
 
-define dso_local void @test_cost_splits(%struct.S* nocapture %p) local_unnamed_addr {
+define dso_local void @test_cost_splits(ptr nocapture %p) local_unnamed_addr {
 ; CHECK-LABEL: @test_cost_splits(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = alloca [16 x i32], align 16
 ; CHECK-NEXT:    [[P2:%.*]] = alloca [16 x i32], align 16
 ; CHECK-NEXT:    [[P3:%.*]] = alloca [16 x i32], align 16
 ; CHECK-NEXT:    [[P4:%.*]] = alloca [16 x i32], align 16
-; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P1]], i32 0, i64 4
-; CHECK-NEXT:    [[G12:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P2]], i32 0, i64 6
-; CHECK-NEXT:    [[G20:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P3]], i32 0, i64 12
-; CHECK-NEXT:    [[G22:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P4]], i32 0, i64 14
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[G10]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[G12]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[G20]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[G22]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[G10:%.*]] = getelementptr inbounds [16 x i32], ptr [[P1]], i32 0, i64 4
+; CHECK-NEXT:    [[G12:%.*]] = getelementptr inbounds [16 x i32], ptr [[P2]], i32 0, i64 6
+; CHECK-NEXT:    [[G20:%.*]] = getelementptr inbounds [16 x i32], ptr [[P3]], i32 0, i64 12
+; CHECK-NEXT:    [[G22:%.*]] = getelementptr inbounds [16 x i32], ptr [[P4]], i32 0, i64 14
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[G10]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[G12]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[G20]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr [[G22]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -183,8 +169,7 @@ define dso_local void @test_cost_splits(%struct.S* nocapture %p) local_unnamed_a
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[TMP14]], <8 x i32>* [[TMP15]], align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP14]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -192,37 +177,36 @@ entry:
   %p2 = alloca [16 x i32], align 16
   %p3 = alloca [16 x i32], align 16
   %p4 = alloca [16 x i32], align 16
-  %g10 = getelementptr inbounds [16 x i32], [16 x i32]* %p1, i32 0, i64 4
-  %g11 = getelementptr inbounds [16 x i32], [16 x i32]* %p1, i32 0, i64 5
-  %g12 = getelementptr inbounds [16 x i32], [16 x i32]* %p2, i32 0, i64 6
-  %g13 = getelementptr inbounds [16 x i32], [16 x i32]* %p2, i32 0, i64 7
-  %g20 = getelementptr inbounds [16 x i32], [16 x i32]* %p3, i32 0, i64 12
-  %g21 = getelementptr inbounds [16 x i32], [16 x i32]* %p3, i32 0, i64 13
-  %g22 = getelementptr inbounds [16 x i32], [16 x i32]* %p4, i32 0, i64 14
-  %g23 = getelementptr inbounds [16 x i32], [16 x i32]* %p4, i32 0, i64 15
-  %i1 = load i32, i32* %g10, align 4
-  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 0
-  store i32 %i1, i32* %arrayidx2, align 4
-  %i3 = load i32, i32* %g11, align 4
-  %arrayidx9 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 1
-  store i32 %i3, i32* %arrayidx9, align 4
-  %i5 = load i32, i32* %g12, align 4
-  %arrayidx16 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 2
-  store i32 %i5, i32* %arrayidx16, align 4
-  %i7 = load i32, i32* %g13, align 4
-  %arrayidx23 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 3
-  store i32 %i7, i32* %arrayidx23, align 4
-  %i9 = load i32, i32* %g20, align 4
-  %arrayidx30 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 4
-  store i32 %i9, i32* %arrayidx30, align 4
-  %i11 = load i32, i32* %g21, align 4
-  %arrayidx37 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 5
-  store i32 %i11, i32* %arrayidx37, align 4
-  %i13 = load i32, i32* %g22, align 4
-  %arrayidx44 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 6
-  store i32 %i13, i32* %arrayidx44, align 4
-  %i15 = load i32, i32* %g23, align 4
-  %arrayidx51 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0, i64 7
-  store i32 %i15, i32* %arrayidx51, align 4
+  %g10 = getelementptr inbounds [16 x i32], ptr %p1, i32 0, i64 4
+  %g11 = getelementptr inbounds [16 x i32], ptr %p1, i32 0, i64 5
+  %g12 = getelementptr inbounds [16 x i32], ptr %p2, i32 0, i64 6
+  %g13 = getelementptr inbounds [16 x i32], ptr %p2, i32 0, i64 7
+  %g20 = getelementptr inbounds [16 x i32], ptr %p3, i32 0, i64 12
+  %g21 = getelementptr inbounds [16 x i32], ptr %p3, i32 0, i64 13
+  %g22 = getelementptr inbounds [16 x i32], ptr %p4, i32 0, i64 14
+  %g23 = getelementptr inbounds [16 x i32], ptr %p4, i32 0, i64 15
+  %i1 = load i32, ptr %g10, align 4
+  store i32 %i1, ptr %p, align 4
+  %i3 = load i32, ptr %g11, align 4
+  %arrayidx9 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 1
+  store i32 %i3, ptr %arrayidx9, align 4
+  %i5 = load i32, ptr %g12, align 4
+  %arrayidx16 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 2
+  store i32 %i5, ptr %arrayidx16, align 4
+  %i7 = load i32, ptr %g13, align 4
+  %arrayidx23 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 3
+  store i32 %i7, ptr %arrayidx23, align 4
+  %i9 = load i32, ptr %g20, align 4
+  %arrayidx30 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 4
+  store i32 %i9, ptr %arrayidx30, align 4
+  %i11 = load i32, ptr %g21, align 4
+  %arrayidx37 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 5
+  store i32 %i11, ptr %arrayidx37, align 4
+  %i13 = load i32, ptr %g22, align 4
+  %arrayidx44 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 6
+  store i32 %i13, ptr %arrayidx44, align 4
+  %i15 = load i32, ptr %g23, align 4
+  %arrayidx51 = getelementptr inbounds %struct.S, ptr %p, i64 0, i32 0, i64 7
+  store i32 %i15, ptr %arrayidx51, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll
index fbc39945006df..ba83ff096c9ac 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll
@@ -1,114 +1,111 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s
 
-define void @test(i32* noalias %p, i32* noalias %addr, i32* noalias %s) {
+define void @test(ptr noalias %p, ptr noalias %addr, ptr noalias %s) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32*> poison, i32* [[ADDR:%.*]], i32 0
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x i32*> [[TMP0]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32*> poison, i32* [[P:%.*]], i32 0
-; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x i32*> [[TMP4]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE2]], <8 x i32> [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE2]], <8 x i32> [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP8]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0
+; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison)
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[TMP10]], <8 x i32>* [[TMP11]], align 4
+; CHECK-NEXT:    store <8 x i32> [[TMP10]], ptr [[S:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %idx1 = load i32, i32 *%addr, align 8
-  %arrayidx = getelementptr inbounds i32, i32* %p, i32 %idx1
-  %i = load i32, i32* %arrayidx, align 4
-  %gep2 = getelementptr inbounds i32, i32* %addr, i32 1
-  %idx2 = load i32, i32 *%gep2, align 8
-  %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 %idx2
-  %i1 = load i32, i32* %arrayidx1, align 4
+  %idx1 = load i32, ptr %addr, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %p, i32 %idx1
+  %i = load i32, ptr %arrayidx, align 4
+  %gep2 = getelementptr inbounds i32, ptr %addr, i32 1
+  %idx2 = load i32, ptr %gep2, align 8
+  %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 %idx2
+  %i1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %i1, %i
-  %arrayidx2 = getelementptr inbounds i32, i32* %s, i32 0
-  store i32 %add, i32* %arrayidx2, align 4
-  %gep3 = getelementptr inbounds i32, i32* %addr, i32 2
-  %idx3 = load i32, i32 *%gep3, align 8
-  %arrayidx4 = getelementptr inbounds i32, i32* %p, i32 %idx3
-  %i2 = load i32, i32* %arrayidx4, align 4
-  %gep4 = getelementptr inbounds i32, i32* %addr, i32 3
-  %idx4 = load i32, i32 *%gep4, align 8
-  %arrayidx6 = getelementptr inbounds i32, i32* %p, i32 %idx4
-  %i3 = load i32, i32* %arrayidx6, align 4
+  store i32 %add, ptr %s, align 4
+  %gep3 = getelementptr inbounds i32, ptr %addr, i32 2
+  %idx3 = load i32, ptr %gep3, align 8
+  %arrayidx4 = getelementptr inbounds i32, ptr %p, i32 %idx3
+  %i2 = load i32, ptr %arrayidx4, align 4
+  %gep4 = getelementptr inbounds i32, ptr %addr, i32 3
+  %idx4 = load i32, ptr %gep4, align 8
+  %arrayidx6 = getelementptr inbounds i32, ptr %p, i32 %idx4
+  %i3 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %i3, %i2
-  %arrayidx9 = getelementptr inbounds i32, i32* %s, i32 1
-  store i32 %add7, i32* %arrayidx9, align 4
-  %gep5 = getelementptr inbounds i32, i32* %addr, i32 4
-  %idx5 = load i32, i32 *%gep5, align 8
-  %arrayidx11 = getelementptr inbounds i32, i32* %p, i32 %idx5
-  %i4 = load i32, i32* %arrayidx11, align 4
-  %gep6 = getelementptr inbounds i32, i32* %addr, i32 5
-  %idx6 = load i32, i32 *%gep6, align 8
-  %arrayidx13 = getelementptr inbounds i32, i32* %p, i32 %idx6
-  %i5 = load i32, i32* %arrayidx13, align 4
+  %arrayidx9 = getelementptr inbounds i32, ptr %s, i32 1
+  store i32 %add7, ptr %arrayidx9, align 4
+  %gep5 = getelementptr inbounds i32, ptr %addr, i32 4
+  %idx5 = load i32, ptr %gep5, align 8
+  %arrayidx11 = getelementptr inbounds i32, ptr %p, i32 %idx5
+  %i4 = load i32, ptr %arrayidx11, align 4
+  %gep6 = getelementptr inbounds i32, ptr %addr, i32 5
+  %idx6 = load i32, ptr %gep6, align 8
+  %arrayidx13 = getelementptr inbounds i32, ptr %p, i32 %idx6
+  %i5 = load i32, ptr %arrayidx13, align 4
   %add14 = add nsw i32 %i5, %i4
-  %arrayidx16 = getelementptr inbounds i32, i32* %s, i32 2
-  store i32 %add14, i32* %arrayidx16, align 4
-  %gep7 = getelementptr inbounds i32, i32* %addr, i32 6
-  %idx7 = load i32, i32 *%gep7, align 8
-  %arrayidx18 = getelementptr inbounds i32, i32* %p, i32 %idx7
-  %i6 = load i32, i32* %arrayidx18, align 4
-  %gep8 = getelementptr inbounds i32, i32* %addr, i32 7
-  %idx8 = load i32, i32 *%gep8, align 8
-  %arrayidx20 = getelementptr inbounds i32, i32* %p, i32 %idx8
-  %i7 = load i32, i32* %arrayidx20, align 4
+  %arrayidx16 = getelementptr inbounds i32, ptr %s, i32 2
+  store i32 %add14, ptr %arrayidx16, align 4
+  %gep7 = getelementptr inbounds i32, ptr %addr, i32 6
+  %idx7 = load i32, ptr %gep7, align 8
+  %arrayidx18 = getelementptr inbounds i32, ptr %p, i32 %idx7
+  %i6 = load i32, ptr %arrayidx18, align 4
+  %gep8 = getelementptr inbounds i32, ptr %addr, i32 7
+  %idx8 = load i32, ptr %gep8, align 8
+  %arrayidx20 = getelementptr inbounds i32, ptr %p, i32 %idx8
+  %i7 = load i32, ptr %arrayidx20, align 4
   %add21 = add nsw i32 %i7, %i6
-  %arrayidx23 = getelementptr inbounds i32, i32* %s, i32 3
-  store i32 %add21, i32* %arrayidx23, align 4
-  %gep9 = getelementptr inbounds i32, i32* %addr, i32 8
-  %idx9 = load i32, i32 *%gep9, align 8
-  %arrayidx25 = getelementptr inbounds i32, i32* %p, i32 %idx9
-  %i8 = load i32, i32* %arrayidx25, align 4
-  %gep10 = getelementptr inbounds i32, i32* %addr, i32 9
-  %idx10 = load i32, i32 *%gep10, align 8
-  %arrayidx27 = getelementptr inbounds i32, i32* %p, i32 %idx10
-  %i9 = load i32, i32* %arrayidx27, align 4
+  %arrayidx23 = getelementptr inbounds i32, ptr %s, i32 3
+  store i32 %add21, ptr %arrayidx23, align 4
+  %gep9 = getelementptr inbounds i32, ptr %addr, i32 8
+  %idx9 = load i32, ptr %gep9, align 8
+  %arrayidx25 = getelementptr inbounds i32, ptr %p, i32 %idx9
+  %i8 = load i32, ptr %arrayidx25, align 4
+  %gep10 = getelementptr inbounds i32, ptr %addr, i32 9
+  %idx10 = load i32, ptr %gep10, align 8
+  %arrayidx27 = getelementptr inbounds i32, ptr %p, i32 %idx10
+  %i9 = load i32, ptr %arrayidx27, align 4
   %add28 = add nsw i32 %i9, %i8
-  %arrayidx30 = getelementptr inbounds i32, i32* %s, i32 4
-  store i32 %add28, i32* %arrayidx30, align 4
-  %gep11 = getelementptr inbounds i32, i32* %addr, i32 10
-  %idx11 = load i32, i32 *%gep11, align 8
-  %arrayidx32 = getelementptr inbounds i32, i32* %p, i32 %idx11
-  %i10 = load i32, i32* %arrayidx32, align 4
-  %gep12 = getelementptr inbounds i32, i32* %addr, i32 11
-  %idx12 = load i32, i32 *%gep12, align 8
-  %arrayidx34 = getelementptr inbounds i32, i32* %p, i32 %idx12
-  %i11 = load i32, i32* %arrayidx34, align 4
+  %arrayidx30 = getelementptr inbounds i32, ptr %s, i32 4
+  store i32 %add28, ptr %arrayidx30, align 4
+  %gep11 = getelementptr inbounds i32, ptr %addr, i32 10
+  %idx11 = load i32, ptr %gep11, align 8
+  %arrayidx32 = getelementptr inbounds i32, ptr %p, i32 %idx11
+  %i10 = load i32, ptr %arrayidx32, align 4
+  %gep12 = getelementptr inbounds i32, ptr %addr, i32 11
+  %idx12 = load i32, ptr %gep12, align 8
+  %arrayidx34 = getelementptr inbounds i32, ptr %p, i32 %idx12
+  %i11 = load i32, ptr %arrayidx34, align 4
   %add35 = add nsw i32 %i11, %i10
-  %arrayidx37 = getelementptr inbounds i32, i32* %s, i32 5
-  store i32 %add35, i32* %arrayidx37, align 4
-  %gep13 = getelementptr inbounds i32, i32* %addr, i32 12
-  %idx13 = load i32, i32 *%gep13, align 8
-  %arrayidx39 = getelementptr inbounds i32, i32* %p, i32 %idx13
-  %i12 = load i32, i32* %arrayidx39, align 4
-  %gep14 = getelementptr inbounds i32, i32* %addr, i32 13
-  %idx14 = load i32, i32 *%gep14, align 8
-  %arrayidx41 = getelementptr inbounds i32, i32* %p, i32 %idx14
-  %i13 = load i32, i32* %arrayidx41, align 4
+  %arrayidx37 = getelementptr inbounds i32, ptr %s, i32 5
+  store i32 %add35, ptr %arrayidx37, align 4
+  %gep13 = getelementptr inbounds i32, ptr %addr, i32 12
+  %idx13 = load i32, ptr %gep13, align 8
+  %arrayidx39 = getelementptr inbounds i32, ptr %p, i32 %idx13
+  %i12 = load i32, ptr %arrayidx39, align 4
+  %gep14 = getelementptr inbounds i32, ptr %addr, i32 13
+  %idx14 = load i32, ptr %gep14, align 8
+  %arrayidx41 = getelementptr inbounds i32, ptr %p, i32 %idx14
+  %i13 = load i32, ptr %arrayidx41, align 4
   %add42 = add nsw i32 %i13, %i12
-  %arrayidx44 = getelementptr inbounds i32, i32* %s, i32 6
-  store i32 %add42, i32* %arrayidx44, align 4
-  %gep15 = getelementptr inbounds i32, i32* %addr, i32 14
-  %idx15 = load i32, i32 *%gep15, align 8
-  %arrayidx46 = getelementptr inbounds i32, i32* %p, i32 %idx15
-  %i14 = load i32, i32* %arrayidx46, align 4
-  %gep16 = getelementptr inbounds i32, i32* %addr, i32 15
-  %idx16 = load i32, i32 *%gep16, align 8
-  %arrayidx48 = getelementptr inbounds i32, i32* %p, i32 %idx16
-  %i15 = load i32, i32* %arrayidx48, align 4
+  %arrayidx44 = getelementptr inbounds i32, ptr %s, i32 6
+  store i32 %add42, ptr %arrayidx44, align 4
+  %gep15 = getelementptr inbounds i32, ptr %addr, i32 14
+  %idx15 = load i32, ptr %gep15, align 8
+  %arrayidx46 = getelementptr inbounds i32, ptr %p, i32 %idx15
+  %i14 = load i32, ptr %arrayidx46, align 4
+  %gep16 = getelementptr inbounds i32, ptr %addr, i32 15
+  %idx16 = load i32, ptr %gep16, align 8
+  %arrayidx48 = getelementptr inbounds i32, ptr %p, i32 %idx16
+  %i15 = load i32, ptr %arrayidx48, align 4
   %add49 = add nsw i32 %i15, %i14
-  %arrayidx51 = getelementptr inbounds i32, i32* %s, i32 7
-  store i32 %add49, i32* %arrayidx51, align 4
+  %arrayidx51 = getelementptr inbounds i32, ptr %s, i32 7
+  store i32 %add49, ptr %arrayidx51, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sqrt.ll b/llvm/test/Transforms/SLPVectorizer/X86/sqrt.ll
index 0ba6caf50e379..108d1b91e1855 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sqrt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sqrt.ll
@@ -20,90 +20,90 @@ declare double @llvm.sqrt.f64(double)
 
 define void @sqrt_2f64() #0 {
 ; CHECK-LABEL: @sqrt_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
   %sqrt0 = call double @llvm.sqrt.f64(double %a0)
   %sqrt1 = call double @llvm.sqrt.f64(double %a1)
-  store double %sqrt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %sqrt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %sqrt0, ptr @dst64, align 8
+  store double %sqrt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @sqrt_4f64() #0 {
 ; SSE-LABEL: @sqrt_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 8
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP3]])
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sqrt_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP1]])
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 8
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+  %a0 = load double, ptr @src64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 8
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 8
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 8
   %sqrt0 = call double @llvm.sqrt.f64(double %a0)
   %sqrt1 = call double @llvm.sqrt.f64(double %a1)
   %sqrt2 = call double @llvm.sqrt.f64(double %a2)
   %sqrt3 = call double @llvm.sqrt.f64(double %a3)
-  store double %sqrt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
-  store double %sqrt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %sqrt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
-  store double %sqrt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %sqrt0, ptr @dst64, align 8
+  store double %sqrt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %sqrt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 8
+  store double %sqrt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @sqrt_8f64() #0 {
 ; SSE-LABEL: @sqrt_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src64, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP3]])
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP5]])
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP7]])
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sqrt_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x double>, ptr @src64, align 4
 ; AVX256-NEXT:    [[TMP2:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP1]])
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 4
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP3]])
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sqrt_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src64, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[TMP1]])
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 4
-  %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 4
-  %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 4
-  %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 4
-  %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 4
-  %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 4
-  %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 4
-  %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 4
+  %a0 = load double, ptr @src64, align 4
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 1), align 4
+  %a2 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 2), align 4
+  %a3 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 3), align 4
+  %a4 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 4), align 4
+  %a5 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 5), align 4
+  %a6 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 6), align 4
+  %a7 = load double, ptr getelementptr inbounds ([8 x double], ptr @src64, i32 0, i64 7), align 4
   %sqrt0 = call double @llvm.sqrt.f64(double %a0)
   %sqrt1 = call double @llvm.sqrt.f64(double %a1)
   %sqrt2 = call double @llvm.sqrt.f64(double %a2)
@@ -112,63 +112,63 @@ define void @sqrt_8f64() #0 {
   %sqrt5 = call double @llvm.sqrt.f64(double %a5)
   %sqrt6 = call double @llvm.sqrt.f64(double %a6)
   %sqrt7 = call double @llvm.sqrt.f64(double %a7)
-  store double %sqrt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
-  store double %sqrt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
-  store double %sqrt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
-  store double %sqrt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
-  store double %sqrt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
-  store double %sqrt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
-  store double %sqrt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
-  store double %sqrt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+  store double %sqrt0, ptr @dst64, align 4
+  store double %sqrt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 4
+  store double %sqrt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 4
+  store double %sqrt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 4
+  store double %sqrt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 4
+  store double %sqrt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 4
+  store double %sqrt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 4
+  store double %sqrt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sqrt_4f32() #0 {
 ; CHECK-LABEL: @sqrt_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
 ; CHECK-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
   %sqrt0 = call float @llvm.sqrt.f32(float %a0)
   %sqrt1 = call float @llvm.sqrt.f32(float %a1)
   %sqrt2 = call float @llvm.sqrt.f32(float %a2)
   %sqrt3 = call float @llvm.sqrt.f32(float %a3)
-  store float %sqrt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %sqrt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %sqrt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %sqrt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %sqrt0, ptr @dst32, align 4
+  store float %sqrt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %sqrt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %sqrt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @sqrt_8f32() #0 {
 ; SSE-LABEL: @sqrt_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP3]])
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @sqrt_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[TMP1]])
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX-NEXT:    ret void
 ;
-  %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-  %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-  %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-  %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-  %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-  %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-  %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-  %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+  %a0 = load float, ptr @src32, align 4
+  %a1 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 1), align 4
+  %a2 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 2), align 4
+  %a3 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 3), align 4
+  %a4 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
+  %a5 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 5), align 4
+  %a6 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 6), align 4
+  %a7 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 7), align 4
   %sqrt0 = call float @llvm.sqrt.f32(float %a0)
   %sqrt1 = call float @llvm.sqrt.f32(float %a1)
   %sqrt2 = call float @llvm.sqrt.f32(float %a2)
@@ -177,64 +177,64 @@ define void @sqrt_8f32() #0 {
   %sqrt5 = call float @llvm.sqrt.f32(float %a5)
   %sqrt6 = call float @llvm.sqrt.f32(float %a6)
   %sqrt7 = call float @llvm.sqrt.f32(float %a7)
-  store float %sqrt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
-  store float %sqrt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %sqrt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
-  store float %sqrt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %sqrt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
-  store float %sqrt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %sqrt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
-  store float %sqrt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %sqrt0, ptr @dst32, align 4
+  store float %sqrt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %sqrt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 4
+  store float %sqrt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %sqrt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+  store float %sqrt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %sqrt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 4
+  store float %sqrt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @sqrt_16f32() #0 {
 ; SSE-LABEL: @sqrt_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr @src32, align 4
 ; SSE-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 4
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP3]])
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 4
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP5]])
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP7]])
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @sqrt_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr @src32, align 4
 ; AVX256-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[TMP1]])
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 4
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[TMP3]])
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sqrt_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr @src32, align 4
 ; AVX512-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[TMP1]])
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 4
 ; AVX512-NEXT:    ret void
 ;
-  %a0  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  0), align 4
-  %a1  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  1), align 4
-  %a2  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  2), align 4
-  %a3  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  3), align 4
-  %a4  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  4), align 4
-  %a5  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  5), align 4
-  %a6  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  6), align 4
-  %a7  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  7), align 4
-  %a8  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  8), align 4
-  %a9  = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64  9), align 4
-  %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
-  %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
-  %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
-  %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
-  %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
-  %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+  %a0  = load float, ptr @src32, align 4
+  %a1  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  1), align 4
+  %a2  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  2), align 4
+  %a3  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  3), align 4
+  %a4  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  4), align 4
+  %a5  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  5), align 4
+  %a6  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  6), align 4
+  %a7  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  7), align 4
+  %a8  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  8), align 4
+  %a9  = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64  9), align 4
+  %a10 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 10), align 4
+  %a11 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 11), align 4
+  %a12 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 12), align 4
+  %a13 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 13), align 4
+  %a14 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 14), align 4
+  %a15 = load float, ptr getelementptr inbounds ([16 x float], ptr @src32, i32 0, i64 15), align 4
   %sqrt0  = call float @llvm.sqrt.f32(float %a0 )
   %sqrt1  = call float @llvm.sqrt.f32(float %a1 )
   %sqrt2  = call float @llvm.sqrt.f32(float %a2 )
@@ -251,22 +251,22 @@ define void @sqrt_16f32() #0 {
   %sqrt13 = call float @llvm.sqrt.f32(float %a13)
   %sqrt14 = call float @llvm.sqrt.f32(float %a14)
   %sqrt15 = call float @llvm.sqrt.f32(float %a15)
-  store float %sqrt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  0), align 4
-  store float %sqrt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  1), align 4
-  store float %sqrt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  2), align 4
-  store float %sqrt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  3), align 4
-  store float %sqrt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  4), align 4
-  store float %sqrt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  5), align 4
-  store float %sqrt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  6), align 4
-  store float %sqrt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  7), align 4
-  store float %sqrt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  8), align 4
-  store float %sqrt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64  9), align 4
-  store float %sqrt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
-  store float %sqrt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %sqrt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
-  store float %sqrt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %sqrt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
-  store float %sqrt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %sqrt0 , ptr @dst32, align 4
+  store float %sqrt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  1), align 4
+  store float %sqrt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  2), align 4
+  store float %sqrt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  3), align 4
+  store float %sqrt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  4), align 4
+  store float %sqrt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  5), align 4
+  store float %sqrt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  6), align 4
+  store float %sqrt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  7), align 4
+  store float %sqrt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  8), align 4
+  store float %sqrt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64  9), align 4
+  store float %sqrt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 4
+  store float %sqrt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %sqrt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 4
+  store float %sqrt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %sqrt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 4
+  store float %sqrt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
index 218cef3f76658..19d1d70321327 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
@@ -4,381 +4,366 @@
 declare i64 @may_inf_loop_ro() nounwind readonly
 
 ; Base case without allocas or stacksave
-define void @basecase(i8** %a, i8** %b, i8** %c) {
+define void @basecase(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @basecase(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[A:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8*>, <2 x i8*>* [[TMP1]], align 8
-; CHECK-NEXT:    store i8* null, i8** [[A]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    store ptr null, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
-  %v1 = load i8*, i8** %a
-  store i8* zeroinitializer, i8** %a
-  %a2 = getelementptr i8*, i8** %a, i32 1
-  %v2 = load i8*, i8** %a2
+  %v1 = load ptr, ptr %a
+  store ptr zeroinitializer, ptr %a
+  %a2 = getelementptr ptr, ptr %a, i32 1
+  %v2 = load ptr, ptr %a2
 
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
 ; Using two allocas and a buildvector
-define void @allocas(i8** %a, i8** %b, i8** %c) {
+define void @allocas(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @allocas(
 ; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i8*> [[TMP3]], i32 0
-; CHECK-NEXT:    store i8* [[TMP4]], i8** [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
   %v1 = alloca i8
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  store i8* %add1, i8** %a
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  store ptr %add1, ptr %a
   %v2 = alloca i8
 
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
 ; Allocas can not be speculated above a potentially non-returning call
-define void @allocas_speculation(i8** %a, i8** %b, i8** %c) {
+define void @allocas_speculation(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @allocas_speculation(
 ; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, i8* [[V1]], i32 1
-; CHECK-NEXT:    store i8* [[ADD1]], i8** [[A:%.*]], align 8
+; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
+; CHECK-NEXT:    store ptr [[ADD1]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
 ; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, i8* [[V2]], i32 1
-; CHECK-NEXT:    store i8* [[ADD1]], i8** [[B:%.*]], align 8
-; CHECK-NEXT:    [[B2:%.*]] = getelementptr i8*, i8** [[B]], i32 1
-; CHECK-NEXT:    store i8* [[ADD2]], i8** [[B2]], align 8
+; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
+; CHECK-NEXT:    store ptr [[ADD1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
+; CHECK-NEXT:    store ptr [[ADD2]], ptr [[B2]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
   %v1 = alloca i8
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  store i8* %add1, i8** %a
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  store ptr %add1, ptr %a
   call i64 @may_inf_loop_ro()
   %v2 = alloca i8
 
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
 ; We must be careful not to lift the inalloca alloc above the stacksave here.
 ; We used to miscompile this example before adding explicit dependency handling
 ; for stacksave.
-define void @stacksave(i8** %a, i8** %b, i8** %c) {
+define void @stacksave(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave(
 ; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, i8* [[V1]], i32 1
-; CHECK-NEXT:    store i8* [[ADD1]], i8** [[A:%.*]], align 8
-; CHECK-NEXT:    [[STACK:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
+; CHECK-NEXT:    store ptr [[ADD1]], ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
-; CHECK-NEXT:    call void @use(i8* inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[STACK]])
-; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, i8* [[V2]], i32 1
-; CHECK-NEXT:    store i8* [[ADD1]], i8** [[B:%.*]], align 8
-; CHECK-NEXT:    [[B2:%.*]] = getelementptr i8*, i8** [[B]], i32 1
-; CHECK-NEXT:    store i8* [[ADD2]], i8** [[B2]], align 8
+; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[STACK]])
+; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
+; CHECK-NEXT:    store ptr [[ADD1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
+; CHECK-NEXT:    store ptr [[ADD2]], ptr [[B2]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
   %v1 = alloca i8
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  store i8* %add1, i8** %a
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  store ptr %add1, ptr %a
 
-  %stack = call i8* @llvm.stacksave()
+  %stack = call ptr @llvm.stacksave()
   %v2 = alloca inalloca i8
-  call void @use(i8* inalloca(i8) %v2) readnone
-  call void @llvm.stackrestore(i8* %stack)
+  call void @use(ptr inalloca(i8) %v2) readnone
+  call void @llvm.stackrestore(ptr %stack)
 
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
-define void @stacksave2(i8** %a, i8** %b, i8** %c) {
+define void @stacksave2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave2(
 ; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[STACK:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i8*> [[TMP3]], i32 0
-; CHECK-NEXT:    store i8* [[TMP4]], i8** [[A:%.*]], align 8
-; CHECK-NEXT:    call void @use(i8* inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[STACK]])
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
+; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[STACK]])
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
   %v1 = alloca i8
-  %add1 = getelementptr i8, i8* %v1, i32 1
+  %add1 = getelementptr i8, ptr %v1, i32 1
 
-  %stack = call i8* @llvm.stacksave()
-  store i8* %add1, i8** %a
+  %stack = call ptr @llvm.stacksave()
+  store ptr %add1, ptr %a
   %v2 = alloca inalloca i8
-  call void @use(i8* inalloca(i8) %v2) readonly
-  call void @llvm.stackrestore(i8* %stack)
+  call void @use(ptr inalloca(i8) %v2) readonly
+  call void @llvm.stackrestore(ptr %stack)
 
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
-define void @stacksave3(i8** %a, i8** %b, i8** %c) {
+define void @stacksave3(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave3(
-; CHECK-NEXT:    [[STACK:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
-; CHECK-NEXT:    call void @use(i8* inalloca(i8) [[V2]]) #[[ATTR4]]
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[STACK]])
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8
+; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4]]
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[STACK]])
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
-  %stack = call i8* @llvm.stacksave()
+  %stack = call ptr @llvm.stacksave()
   %v1 = alloca i8
 
   %v2 = alloca inalloca i8
-  call void @use(i8* inalloca(i8) %v2) readnone
-  call void @llvm.stackrestore(i8* %stack)
+  call void @use(ptr inalloca(i8) %v2) readnone
+  call void @llvm.stackrestore(ptr %stack)
 
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
 ; Here we have an alloca which needs to stay under the stacksave, but is not
 ; directly part of the vectorization tree.  Instead, the stacksave is
 ; encountered during dependency scanning via the memory chain.
-define void @stacksave4(i8** %a, i8** %b, i8** %c) {
+define void @stacksave4(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave4(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[A:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8*>, <2 x i8*>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[STACK:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
-; CHECK-NEXT:    call void @use(i8* inalloca(i8) [[X]]) #[[ATTR4]]
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[STACK]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8
+; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[STACK]])
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
-  %v1 = load i8*, i8** %a
-  %a2 = getelementptr i8*, i8** %a, i32 1
-  %v2 = load i8*, i8** %a2
+  %v1 = load ptr, ptr %a
+  %a2 = getelementptr ptr, ptr %a, i32 1
+  %v2 = load ptr, ptr %a2
 
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  %stack = call i8* @llvm.stacksave()
+  %stack = call ptr @llvm.stacksave()
   %x = alloca inalloca i8
-  call void @use(i8* inalloca(i8) %x) readnone
-  call void @llvm.stackrestore(i8* %stack)
+  call void @use(ptr inalloca(i8) %x) readnone
+  call void @llvm.stackrestore(ptr %stack)
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
-define void @stacksave5(i8** %a, i8** %b, i8** %c) {
+define void @stacksave5(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave5(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[A:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8*>, <2 x i8*>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[STACK:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
-; CHECK-NEXT:    call void @use(i8* inalloca(i8) [[X]]) #[[ATTR4]]
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[STACK]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8
+; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[STACK]])
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
-  %v1 = load i8*, i8** %a
-  %a2 = getelementptr i8*, i8** %a, i32 1
-  %v2 = load i8*, i8** %a2
+  %v1 = load ptr, ptr %a
+  %a2 = getelementptr ptr, ptr %a, i32 1
+  %v2 = load ptr, ptr %a2
 
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  %stack = call i8* @llvm.stacksave()
+  %stack = call ptr @llvm.stacksave()
   %x = alloca inalloca i8
-  call void @use(i8* inalloca(i8) %x) readnone
-  call void @llvm.stackrestore(i8* %stack)
+  call void @use(ptr inalloca(i8) %x) readnone
+  call void @llvm.stackrestore(ptr %stack)
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
 ; Reordering the second alloca above the stackrestore while
 ; leaving the write to it below would introduce a write-after-free
 ; bug.
-define void @stackrestore1(i8** %a, i8** %b, i8** %c) {
+define void @stackrestore1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stackrestore1(
-; CHECK-NEXT:    [[STACK:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    store i8 0, i8* [[V1]], align 1
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[STACK]])
+; CHECK-NEXT:    store i8 0, ptr [[V1]], align 1
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[STACK]])
 ; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    store i8 0, i8* [[V2]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8*> poison, i8* [[V1]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8*> [[TMP1]], i8* [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x i8*> [[TMP2]], <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[B:%.*]] to <2 x i8*>*
-; CHECK-NEXT:    store <2 x i8*> [[TMP3]], <2 x i8*>* [[TMP4]], align 8
+; CHECK-NEXT:    store i8 0, ptr [[V2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 
-  %stack = call i8* @llvm.stacksave()
+  %stack = call ptr @llvm.stacksave()
   %v1 = alloca i8
-  store i8 0, i8* %v1
-  call void @llvm.stackrestore(i8* %stack)
+  store i8 0, ptr %v1
+  call void @llvm.stackrestore(ptr %stack)
   %v2 = alloca i8
-  store i8 0, i8* %v2
+  store i8 0, ptr %v2
 
-  %add1 = getelementptr i8, i8* %v1, i32 1
-  %add2 = getelementptr i8, i8* %v2, i32 1
+  %add1 = getelementptr i8, ptr %v1, i32 1
+  %add2 = getelementptr i8, ptr %v2, i32 1
 
-  store i8* %add1, i8** %b
-  %b2 = getelementptr i8*, i8** %b, i32 1
-  store i8* %add2, i8** %b2
+  store ptr %add1, ptr %b
+  %b2 = getelementptr ptr, ptr %b, i32 1
+  store ptr %add2, ptr %b2
   ret void
 }
 
-declare void @use(i8* inalloca(i8))
-declare i8* @llvm.stacksave()
-declare void @llvm.stackrestore(i8*)
+declare void @use(ptr inalloca(i8))
+declare ptr @llvm.stacksave()
+declare void @llvm.stackrestore(ptr)
 
 ; The next set are reduced from previous regressions.
 
-declare i8* @wibble(i8*)
-declare void @quux(i32* inalloca(i32))
+declare ptr @wibble(ptr)
+declare void @quux(ptr inalloca(i32))
 
 define void @ham() #1 {
 ; CHECK-LABEL: @ham(
 ; CHECK-NEXT:    [[VAR2:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[VAR3:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x i8*], align 8
-; CHECK-NEXT:    [[VAR15:%.*]] = call i8* @wibble(i8* [[VAR2]])
-; CHECK-NEXT:    [[VAR16:%.*]] = call i8* @wibble(i8* [[VAR3]])
-; CHECK-NEXT:    [[VAR32:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[VAR32]] to <4 x i8*>*
-; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4
+; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x ptr], align 8
+; CHECK-NEXT:    [[VAR15:%.*]] = call ptr @wibble(ptr [[VAR2]])
+; CHECK-NEXT:    [[VAR16:%.*]] = call ptr @wibble(ptr [[VAR3]])
+; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
 ; CHECK-NEXT:    [[VAR4:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[VAR5:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[VAR17:%.*]] = call i8* @wibble(i8* [[VAR4]])
-; CHECK-NEXT:    [[VAR23:%.*]] = call i8* @llvm.stacksave()
+; CHECK-NEXT:    [[VAR17:%.*]] = call ptr @wibble(ptr [[VAR4]])
+; CHECK-NEXT:    [[VAR23:%.*]] = call ptr @llvm.stacksave()
 ; CHECK-NEXT:    [[VAR24:%.*]] = alloca inalloca i32, align 4
-; CHECK-NEXT:    call void @quux(i32* inalloca(i32) [[VAR24]])
-; CHECK-NEXT:    call void @llvm.stackrestore(i8* [[VAR23]])
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP2]], <4 x i8*> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8*> [[TMP2]], i8* [[VAR5]], i32 1
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i8*> [[TMP3]], <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>*
-; CHECK-NEXT:    store <4 x i8*> [[SHUFFLE1]], <4 x i8*>* [[TMP4]], align 8
+; CHECK-NEXT:    call void @quux(ptr inalloca(i32) [[VAR24]])
+; CHECK-NEXT:    call void @llvm.stackrestore(ptr [[VAR23]])
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    store <4 x ptr> [[SHUFFLE]], ptr [[VAR12]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[VAR5]], i32 1
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    store <4 x ptr> [[SHUFFLE1]], ptr [[VAR36]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %var2 = alloca i8
   %var3 = alloca i8
   %var4 = alloca i8
   %var5 = alloca i8
-  %var12 = alloca [12 x i8*]
-  %var15 = call i8* @wibble(i8* %var2)
-  %var16 = call i8* @wibble(i8* %var3)
-  %var17 = call i8* @wibble(i8* %var4)
-  %var23 = call i8* @llvm.stacksave()
+  %var12 = alloca [12 x ptr]
+  %var15 = call ptr @wibble(ptr %var2)
+  %var16 = call ptr @wibble(ptr %var3)
+  %var17 = call ptr @wibble(ptr %var4)
+  %var23 = call ptr @llvm.stacksave()
   %var24 = alloca inalloca i32
-  call void @quux(i32* inalloca(i32) %var24)
-  call void @llvm.stackrestore(i8* %var23)
-  %var32 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 0
-  store i8* %var4, i8** %var32
-  %var33 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 1
-  store i8* %var4, i8** %var33
-  %var34 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 2
-  store i8* %var4, i8** %var34
-  %var35 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 3
-  store i8* %var4, i8** %var35
-  %var36 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 4
-  store i8* %var4, i8** %var36
-  %var37 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 5
-  store i8* %var5, i8** %var37
-  %var38 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 6
-  store i8* %var5, i8** %var38
-  %var39 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 7
-  store i8* %var5, i8** %var39
+  call void @quux(ptr inalloca(i32) %var24)
+  call void @llvm.stackrestore(ptr %var23)
+  store ptr %var4, ptr %var12
+  %var33 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 1
+  store ptr %var4, ptr %var33
+  %var34 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 2
+  store ptr %var4, ptr %var34
+  %var35 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 3
+  store ptr %var4, ptr %var35
+  %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
+  store ptr %var4, ptr %var36
+  %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
+  store ptr %var5, ptr %var37
+  %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
+  store ptr %var5, ptr %var38
+  %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
+  store ptr %var5, ptr %var39
   ret void
 }
 
 define void @spam() #1 {
 ; CHECK-LABEL: @spam(
-; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x i8*], align 8
-; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4
+; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x ptr], align 8
+; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
 ; CHECK-NEXT:    [[VAR4:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[VAR5:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8*> [[TMP1]], i8* [[VAR5]], i32 1
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP2]], <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>*
-; CHECK-NEXT:    store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    store <4 x ptr> [[SHUFFLE]], ptr [[VAR36]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %var4 = alloca i8
   %var5 = alloca i8
-  %var12 = alloca [12 x i8*]
-  %var36 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 4
-  store i8* %var4, i8** %var36
-  %var37 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 5
-  store i8* %var5, i8** %var37
-  %var38 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 6
-  store i8* %var5, i8** %var38
-  %var39 = getelementptr inbounds [12 x i8*], [12 x i8*]* %var12, i32 0, i32 7
-  store i8* %var5, i8** %var39
+  %var12 = alloca [12 x ptr]
+  %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
+  store ptr %var4, ptr %var36
+  %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
+  store ptr %var5, ptr %var37
+  %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
+  store ptr %var5, ptr %var38
+  %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
+  store ptr %var5, ptr %var39
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
index 6c1e7d17c545b..48243265bc3af 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/store-jumbled.ll
@@ -3,49 +3,40 @@
 
 
 
-define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {
+define i32 @jumbled-load(ptr noalias nocapture %in, ptr noalias nocapture %inn, ptr noalias nocapture %out) {
 ; CHECK-LABEL: @jumbled-load(
-; CHECK-NEXT:    [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
-; CHECK-NEXT:    [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[IN:%.*]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[INN:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
-  %in.addr = getelementptr inbounds i32, i32* %in, i64 0
-  %load.1 = load i32, i32* %in.addr, align 4
-  %gep.1 = getelementptr inbounds i32, i32* %in.addr, i64 1
-  %load.2 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %in.addr, i64 2
-  %load.3 = load i32, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %in.addr, i64 3
-  %load.4 = load i32, i32* %gep.3, align 4
-  %inn.addr = getelementptr inbounds i32, i32* %inn, i64 0
-  %load.5 = load i32, i32* %inn.addr, align 4
-  %gep.4 = getelementptr inbounds i32, i32* %inn.addr, i64 1
-  %load.6 = load i32, i32* %gep.4, align 4
-  %gep.5 = getelementptr inbounds i32, i32* %inn.addr, i64 2
-  %load.7 = load i32, i32* %gep.5, align 4
-  %gep.6 = getelementptr inbounds i32, i32* %inn.addr, i64 3
-  %load.8 = load i32, i32* %gep.6, align 4
+  %load.1 = load i32, ptr %in, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %in, i64 1
+  %load.2 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %in, i64 2
+  %load.3 = load i32, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %in, i64 3
+  %load.4 = load i32, ptr %gep.3, align 4
+  %load.5 = load i32, ptr %inn, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %inn, i64 1
+  %load.6 = load i32, ptr %gep.4, align 4
+  %gep.5 = getelementptr inbounds i32, ptr %inn, i64 2
+  %load.7 = load i32, ptr %gep.5, align 4
+  %gep.6 = getelementptr inbounds i32, ptr %inn, i64 3
+  %load.8 = load i32, ptr %gep.6, align 4
   %mul.1 = mul i32 %load.1, %load.5
   %mul.2 = mul i32 %load.2, %load.6
   %mul.3 = mul i32 %load.3, %load.7
   %mul.4 = mul i32 %load.4, %load.8
-  %gep.7 = getelementptr inbounds i32, i32* %out, i64 0
-  %gep.8 = getelementptr inbounds i32, i32* %out, i64 1
-  %gep.9 = getelementptr inbounds i32, i32* %out, i64 2
-  %gep.10 = getelementptr inbounds i32, i32* %out, i64 3
-  store i32 %mul.1, i32* %gep.9, align 4
-  store i32 %mul.2, i32* %gep.7, align 4
-  store i32 %mul.3, i32* %gep.10, align 4
-  store i32 %mul.4, i32* %gep.8, align 4
+  %gep.8 = getelementptr inbounds i32, ptr %out, i64 1
+  %gep.9 = getelementptr inbounds i32, ptr %out, i64 2
+  %gep.10 = getelementptr inbounds i32, ptr %out, i64 3
+  store i32 %mul.1, ptr %gep.9, align 4
+  store i32 %mul.2, ptr %out, align 4
+  store i32 %mul.3, ptr %gep.10, align 4
+  store i32 %mul.4, ptr %gep.8, align 4
 
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll
index c4f0c2fd458d5..835b8d0006cbf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll
@@ -1,24 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -mtriple=x86_64-unknown -passes=slp-vectorizer -slp-max-store-lookup=2 -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s
 
-define i32 @non-ordered-stores(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {
+define i32 @non-ordered-stores(ptr noalias nocapture %in, ptr noalias nocapture %inn, ptr noalias nocapture %out) {
 ; CHECK-LABEL: @non-ordered-stores(
-; CHECK-NEXT:    [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
-; CHECK-NEXT:    [[LOAD_1:%.*]] = load i32, i32* [[IN_ADDR]], align 4
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1
-; CHECK-NEXT:    [[LOAD_2:%.*]] = load i32, i32* [[GEP_1]], align 4
-; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
-; CHECK-NEXT:    [[LOAD_3:%.*]] = load i32, i32* [[GEP_2]], align 4
-; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3
-; CHECK-NEXT:    [[LOAD_4:%.*]] = load i32, i32* [[GEP_3]], align 4
-; CHECK-NEXT:    [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1
-; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2
-; CHECK-NEXT:    [[LOAD_5:%.*]] = load i32, i32* [[INN_ADDR]], align 4
-; CHECK-NEXT:    [[LOAD_7:%.*]] = load i32, i32* [[GEP_5]], align 4
-; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3
-; CHECK-NEXT:    [[LOAD_6:%.*]] = load i32, i32* [[GEP_4]], align 4
-; CHECK-NEXT:    [[LOAD_8:%.*]] = load i32, i32* [[GEP_6]], align 4
+; CHECK-NEXT:    [[LOAD_1:%.*]] = load i32, ptr [[IN:%.*]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 1
+; CHECK-NEXT:    [[LOAD_2:%.*]] = load i32, ptr [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[LOAD_3:%.*]] = load i32, ptr [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 3
+; CHECK-NEXT:    [[LOAD_4:%.*]] = load i32, ptr [[GEP_3]], align 4
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i32, ptr [[INN:%.*]], i64 1
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i32, ptr [[INN]], i64 2
+; CHECK-NEXT:    [[LOAD_5:%.*]] = load i32, ptr [[INN]], align 4
+; CHECK-NEXT:    [[LOAD_7:%.*]] = load i32, ptr [[GEP_5]], align 4
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i32, ptr [[INN]], i64 3
+; CHECK-NEXT:    [[LOAD_6:%.*]] = load i32, ptr [[GEP_4]], align 4
+; CHECK-NEXT:    [[LOAD_8:%.*]] = load i32, ptr [[GEP_6]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[LOAD_1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[LOAD_3]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[LOAD_5]], i32 0
@@ -31,36 +29,31 @@ define i32 @non-ordered-stores(i32* noalias nocapture %in, i32* noalias nocaptur
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <2 x i32> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    br label [[BLOCK1:%.*]]
 ; CHECK:       block1:
-; CHECK-NEXT:    [[GEP_X:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 5
-; CHECK-NEXT:    [[LOAD_9:%.*]] = load i32, i32* [[GEP_X]], align 4
+; CHECK-NEXT:    [[GEP_X:%.*]] = getelementptr inbounds i32, ptr [[INN]], i64 5
+; CHECK-NEXT:    [[LOAD_9:%.*]] = load i32, ptr [[GEP_X]], align 4
 ; CHECK-NEXT:    br label [[BLOCK2:%.*]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
-; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
-; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
-; CHECK-NEXT:    store i32 [[LOAD_9]], i32* [[GEP_9]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[GEP_10]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP5]], <2 x i32>* [[TMP11]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP10]], <2 x i32>* [[TMP12]], align 4
+; CHECK-NEXT:    [[GEP_9:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 2
+; CHECK-NEXT:    [[GEP_10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+; CHECK-NEXT:    store i32 [[LOAD_9]], ptr [[GEP_9]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr [[GEP_10]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP10]], ptr [[OUT]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
-  %in.addr = getelementptr inbounds i32, i32* %in, i64 0
-  %load.1 = load i32, i32* %in.addr, align 4
-  %gep.1 = getelementptr inbounds i32, i32* %in.addr, i64 1
-  %load.2 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %in.addr, i64 2
-  %load.3 = load i32, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %in.addr, i64 3
-  %load.4 = load i32, i32* %gep.3, align 4
-  %inn.addr = getelementptr inbounds i32, i32* %inn, i64 0
-  %load.5 = load i32, i32* %inn.addr, align 4
-  %gep.4 = getelementptr inbounds i32, i32* %inn.addr, i64 1
-  %load.6 = load i32, i32* %gep.4, align 4
-  %gep.5 = getelementptr inbounds i32, i32* %inn.addr, i64 2
-  %load.7 = load i32, i32* %gep.5, align 4
-  %gep.6 = getelementptr inbounds i32, i32* %inn.addr, i64 3
-  %load.8 = load i32, i32* %gep.6, align 4
+  %load.1 = load i32, ptr %in, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %in, i64 1
+  %load.2 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %in, i64 2
+  %load.3 = load i32, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %in, i64 3
+  %load.4 = load i32, ptr %gep.3, align 4
+  %load.5 = load i32, ptr %inn, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %inn, i64 1
+  %load.6 = load i32, ptr %gep.4, align 4
+  %gep.5 = getelementptr inbounds i32, ptr %inn, i64 2
+  %load.7 = load i32, ptr %gep.5, align 4
+  %gep.6 = getelementptr inbounds i32, ptr %inn, i64 3
+  %load.8 = load i32, ptr %gep.6, align 4
   %mul.1 = mul i32 %load.1, %load.5
   %mul.2 = mul i32 %load.2, %load.6
   %mul.3 = mul i32 %load.3, %load.7
@@ -68,21 +61,20 @@ define i32 @non-ordered-stores(i32* noalias nocapture %in, i32* noalias nocaptur
   br label %block1
 
 block1:
-  %gep.x = getelementptr inbounds i32, i32* %inn.addr, i64 5
-  %load.9 = load i32, i32* %gep.x, align 4
+  %gep.x = getelementptr inbounds i32, ptr %inn, i64 5
+  %load.9 = load i32, ptr %gep.x, align 4
   br label %block2
 
 block2:
-  %gep.7 = getelementptr inbounds i32, i32* %out, i64 0
-  %gep.8 = getelementptr inbounds i32, i32* %out, i64 1
-  %gep.9 = getelementptr inbounds i32, i32* %out, i64 2
-  %gep.10 = getelementptr inbounds i32, i32* %out, i64 3
-  %gep.11 = getelementptr inbounds i32, i32* %out, i64 4
-  store i32 %mul.1, i32* %gep.10, align 4
-  store i32 %load.9, i32* %gep.9, align 4
-  store i32 %mul.2, i32* %gep.7, align 4
-  store i32 %mul.3, i32* %gep.11, align 4
-  store i32 %mul.4, i32* %gep.8, align 4
+  %gep.8 = getelementptr inbounds i32, ptr %out, i64 1
+  %gep.9 = getelementptr inbounds i32, ptr %out, i64 2
+  %gep.10 = getelementptr inbounds i32, ptr %out, i64 3
+  %gep.11 = getelementptr inbounds i32, ptr %out, i64 4
+  store i32 %mul.1, ptr %gep.10, align 4
+  store i32 %load.9, ptr %gep.9, align 4
+  store i32 %mul.2, ptr %out, align 4
+  store i32 %mul.3, ptr %gep.11, align 4
+  store i32 %mul.4, ptr %gep.8, align 4
 
   ret i32 undef
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
index 0179890b797ed..378403ac30468 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
 
-;void Distance(float *p1, int p2, unsigned long p3[], float p4[]) {
+;void Distance(ptr p1, int p2, unsigned long p3[], float p4[]) {
 ;  long a = p3[0] = 5;
 ;  p1 += p2;
 ;  p4[3] += p1[a];
@@ -13,271 +13,262 @@
 ;  p4[0] += p1[p3[0] & a];
 ;}
 
-define void @_Z8DistanceIlLi5EEvPfiPmS0_(float* %p1, i32 %p2, i64* %p3, float* %p4) {
+define void @_Z8DistanceIlLi5EEvPfiPmS0_(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
 ; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i64 5, i64* [[P3:%.*]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[P3:%.*]], align 8
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    store float [[ADD]], float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
+; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
-; CHECK-NEXT:    [[ADD_PTR11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[P3]], align 8
+; CHECK-NEXT:    [[ADD_PTR11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP6]], 5
-; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[ADD_PTR11]], i64 [[AND]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX13]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[P4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR11]], i64 [[AND]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[P4]], align 4
 ; CHECK-NEXT:    [[ADD15:%.*]] = fadd float [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    store float [[ADD15]], float* [[P4]], align 4
+; CHECK-NEXT:    store float [[ADD15]], ptr [[P4]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  store i64 5, i64* %p3, align 8
+  store i64 5, ptr %p3, align 8
   %idx.ext = sext i32 %p2 to i64
-  %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
-  %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
-  %0 = load float, float* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
-  %1 = load float, float* %arrayidx2, align 4
+  %add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
+  %arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
+  %0 = load float, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx2, align 4
-  %2 = load i64, i64* %p3, align 8
+  store float %add, ptr %arrayidx2, align 4
+  %2 = load i64, ptr %p3, align 8
   %shr = lshr i64 %2, 5
-  store i64 %shr, i64* %p3, align 8
-  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
-  %3 = load i64, i64* %arrayidx4, align 8
+  store i64 %shr, ptr %p3, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
+  %3 = load i64, ptr %arrayidx4, align 8
   %shr5 = lshr i64 %3, 5
-  store i64 %shr5, i64* %arrayidx4, align 8
-  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
-  %4 = load i64, i64* %arrayidx6, align 8
+  store i64 %shr5, ptr %arrayidx4, align 8
+  %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
+  %4 = load i64, ptr %arrayidx6, align 8
   %shr7 = lshr i64 %4, 5
-  store i64 %shr7, i64* %arrayidx6, align 8
-  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
-  %5 = load i64, i64* %arrayidx8, align 8
+  store i64 %shr7, ptr %arrayidx6, align 8
+  %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
+  %5 = load i64, ptr %arrayidx8, align 8
   %shr9 = lshr i64 %5, 5
-  store i64 %shr9, i64* %arrayidx8, align 8
-  %add.ptr11 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext
+  store i64 %shr9, ptr %arrayidx8, align 8
+  %add.ptr11 = getelementptr inbounds float, ptr %add.ptr, i64 %idx.ext
   %and = and i64 %shr, 5
-  %arrayidx13 = getelementptr inbounds float, float* %add.ptr11, i64 %and
-  %6 = load float, float* %arrayidx13, align 4
-  %7 = load float, float* %p4, align 4
+  %arrayidx13 = getelementptr inbounds float, ptr %add.ptr11, i64 %and
+  %6 = load float, ptr %arrayidx13, align 4
+  %7 = load float, ptr %p4, align 4
   %add15 = fadd float %6, %7
-  store float %add15, float* %p4, align 4
+  store float %add15, ptr %p4, align 4
   ret void
 }
 
-define void @store_reverse(i64* %p3) {
+define void @store_reverse(ptr %p3) {
 ; CHECK-LABEL: @store_reverse(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P3:%.*]], i64 8
-; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 4
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[ARRAYIDX1]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[P3:%.*]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[P3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[ARRAYIDX14]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP5]], align 8
+; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], ptr [[ARRAYIDX14]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = load i64, i64* %p3, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %p3, i64 8
-  %1 = load i64, i64* %arrayidx1, align 8
+  %0 = load i64, ptr %p3, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %p3, i64 8
+  %1 = load i64, ptr %arrayidx1, align 8
   %shl = shl i64 %0, %1
-  %arrayidx2 = getelementptr inbounds i64, i64* %p3, i64 7
-  store i64 %shl, i64* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds i64, i64* %p3, i64 1
-  %2 = load i64, i64* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 9
-  %3 = load i64, i64* %arrayidx4, align 8
+  %arrayidx2 = getelementptr inbounds i64, ptr %p3, i64 7
+  store i64 %shl, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds i64, ptr %p3, i64 1
+  %2 = load i64, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 9
+  %3 = load i64, ptr %arrayidx4, align 8
   %shl5 = shl i64 %2, %3
-  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 6
-  store i64 %shl5, i64* %arrayidx6, align 8
-  %arrayidx7 = getelementptr inbounds i64, i64* %p3, i64 2
-  %4 = load i64, i64* %arrayidx7, align 8
-  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 10
-  %5 = load i64, i64* %arrayidx8, align 8
+  %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 6
+  store i64 %shl5, ptr %arrayidx6, align 8
+  %arrayidx7 = getelementptr inbounds i64, ptr %p3, i64 2
+  %4 = load i64, ptr %arrayidx7, align 8
+  %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 10
+  %5 = load i64, ptr %arrayidx8, align 8
   %shl9 = shl i64 %4, %5
-  %arrayidx10 = getelementptr inbounds i64, i64* %p3, i64 5
-  store i64 %shl9, i64* %arrayidx10, align 8
-  %arrayidx11 = getelementptr inbounds i64, i64* %p3, i64 3
-  %6 = load i64, i64* %arrayidx11, align 8
-  %arrayidx12 = getelementptr inbounds i64, i64* %p3, i64 11
-  %7 = load i64, i64* %arrayidx12, align 8
+  %arrayidx10 = getelementptr inbounds i64, ptr %p3, i64 5
+  store i64 %shl9, ptr %arrayidx10, align 8
+  %arrayidx11 = getelementptr inbounds i64, ptr %p3, i64 3
+  %6 = load i64, ptr %arrayidx11, align 8
+  %arrayidx12 = getelementptr inbounds i64, ptr %p3, i64 11
+  %7 = load i64, ptr %arrayidx12, align 8
   %shl13 = shl i64 %6, %7
-  %arrayidx14 = getelementptr inbounds i64, i64* %p3, i64 4
-  store i64 %shl13, i64* %arrayidx14, align 8
+  %arrayidx14 = getelementptr inbounds i64, ptr %p3, i64 4
+  store i64 %shl13, ptr %arrayidx14, align 8
   ret void
 }
 
-define void @store15(float* %p1, i32 %p2, i64* %p3, float* %p4) {
+define void @store15(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
 ; CHECK-LABEL: @store15(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i64 5, i64* [[P3:%.*]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[P3:%.*]], align 8
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    store float [[ADD]], float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
+; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 5
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[P3]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  store i64 5, i64* %p3, align 8
+  store i64 5, ptr %p3, align 8
   %idx.ext = sext i32 %p2 to i64
-  %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
-  %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
-  %0 = load float, float* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
-  %1 = load float, float* %arrayidx2, align 4
+  %add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
+  %arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
+  %0 = load float, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx2, align 4
-  %2 = load i64, i64* %p3, align 8
+  store float %add, ptr %arrayidx2, align 4
+  %2 = load i64, ptr %p3, align 8
   %shr = lshr i64 %2, 5
-  store i64 %shr, i64* %p3, align 8
-  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
-  %3 = load i64, i64* %arrayidx4, align 8
+  store i64 %shr, ptr %p3, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
+  %3 = load i64, ptr %arrayidx4, align 8
   %shr5 = lshr i64 %3, 5
-  store i64 %shr5, i64* %arrayidx4, align 8
-  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
-  %4 = load i64, i64* %arrayidx6, align 8
+  store i64 %shr5, ptr %arrayidx4, align 8
+  %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
+  %4 = load i64, ptr %arrayidx6, align 8
   %shr7 = lshr i64 %4, 5
-  store i64 %shr7, i64* %arrayidx6, align 8
-  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
-  %5 = load i64, i64* %arrayidx8, align 8
+  store i64 %shr7, ptr %arrayidx6, align 8
+  %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
+  %5 = load i64, ptr %arrayidx8, align 8
   %shr9 = lshr i64 %5, 5
-  %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 %shr9, i64* %arrayidx8, align 8
+  %arrayidx9 = getelementptr inbounds i64, ptr %p3, i64 5
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 %shr9, ptr %arrayidx8, align 8
   ret void
 }
 
-define void @store16(float* %p1, i32 %p2, i64* %p3, float* %p4) {
+define void @store16(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
 ; CHECK-LABEL: @store16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i64 5, i64* [[P3:%.*]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[P3:%.*]], align 8
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[P1:%.*]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 5
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[P4:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    store float [[ADD]], float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 5
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    store i64 5, i64* [[ARRAYIDX9]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 8
+; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 5
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[P3]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[TMP5]], align 8
+; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[P3]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  store i64 5, i64* %p3, align 8
+  store i64 5, ptr %p3, align 8
   %idx.ext = sext i32 %p2 to i64
-  %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext
-  %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5
-  %0 = load float, float* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3
-  %1 = load float, float* %arrayidx2, align 4
+  %add.ptr = getelementptr inbounds float, ptr %p1, i64 %idx.ext
+  %arrayidx1 = getelementptr inbounds float, ptr %add.ptr, i64 5
+  %0 = load float, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %p4, i64 3
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx2, align 4
-  %2 = load i64, i64* %p3, align 8
+  store float %add, ptr %arrayidx2, align 4
+  %2 = load i64, ptr %p3, align 8
   %shr = lshr i64 %2, 5
-  store i64 %shr, i64* %p3, align 8
-  %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1
-  %3 = load i64, i64* %arrayidx4, align 8
+  store i64 %shr, ptr %p3, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %p3, i64 1
+  %3 = load i64, ptr %arrayidx4, align 8
   %shr5 = lshr i64 %3, 5
-  store i64 %shr5, i64* %arrayidx4, align 8
-  %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2
-  %4 = load i64, i64* %arrayidx6, align 8
+  store i64 %shr5, ptr %arrayidx4, align 8
+  %arrayidx6 = getelementptr inbounds i64, ptr %p3, i64 2
+  %4 = load i64, ptr %arrayidx6, align 8
   %shr7 = lshr i64 %4, 5
-  store i64 %shr7, i64* %arrayidx6, align 8
-  %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3
-  %5 = load i64, i64* %arrayidx8, align 8
+  store i64 %shr7, ptr %arrayidx6, align 8
+  %arrayidx8 = getelementptr inbounds i64, ptr %p3, i64 3
+  %5 = load i64, ptr %arrayidx8, align 8
   %shr9 = lshr i64 %5, 5
-  %arrayidx9 = getelementptr inbounds i64, i64* %p3, i64 5
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 5, i64* %arrayidx9, align 8
-  store i64 %shr9, i64* %arrayidx8, align 8
+  %arrayidx9 = getelementptr inbounds i64, ptr %p3, i64 5
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 5, ptr %arrayidx9, align 8
+  store i64 %shr9, ptr %arrayidx8, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
index 09bd54a3589e6..8fdf5eba844d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
@@ -4,56 +4,46 @@
 ; Without supernode operand reordering, this does not get fully vectorized.
 ; S[0] = (A[0] + B[0]) + C[0]
 ; S[1] = (B[1] + C[1]) + A[1]
-define void @test_supernode_add(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
+define void @test_supernode_add(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
 ; ENABLED-LABEL: @test_supernode_add(
 ; ENABLED-NEXT:  entry:
-; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
-; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
-; ENABLED-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
-; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; ENABLED-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
-; ENABLED-NEXT:    [[C1:%.*]] = load double, double* [[IDXC1]], align 8
+; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
+; ENABLED-NEXT:    [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
+; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
+; ENABLED-NEXT:    [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
+; ENABLED-NEXT:    [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
 ; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
 ; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
 ; ENABLED-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
 ; ENABLED-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
 ; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
 ; ENABLED-NEXT:    [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
-; ENABLED-NEXT:    [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; ENABLED-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; ENABLED-NEXT:    store <2 x double> [[TMP7]], ptr [[SARRAY:%.*]], align 8
 ; ENABLED-NEXT:    ret void
 ;
 entry:
-  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
-  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
-  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
-  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
-  %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
-  %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
-  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
-  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
+  %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
+  %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
+  %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
+  %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
 
-  %A0 = load double, double *%idxA0, align 8
-  %A1 = load double, double *%idxA1, align 8
+  %A0 = load double, ptr %Aarray, align 8
+  %A1 = load double, ptr %idxA1, align 8
 
-  %B0 = load double, double *%idxB0, align 8
-  %B1 = load double, double *%idxB1, align 8
+  %B0 = load double, ptr %Barray, align 8
+  %B1 = load double, ptr %idxB1, align 8
 
-  %C0 = load double, double *%idxC0, align 8
-  %C1 = load double, double *%idxC1, align 8
+  %C0 = load double, ptr %Carray, align 8
+  %C1 = load double, ptr %idxC1, align 8
 
   %addA0B0 = fadd fast double %A0, %B0
   %addB1C1 = fadd fast double %B1, %C1
   %add0 = fadd fast double %addA0B0, %C0
   %add1 = fadd fast double %addB1C1, %A1
-  store double %add0, double *%idxS0, align 8
-  store double %add1, double *%idxS1, align 8
+  store double %add0, ptr %Sarray, align 8
+  store double %add1, ptr %idxS1, align 8
   ret void
 }
 
@@ -61,56 +51,46 @@ entry:
 ; Without supernode operand reordering, this does not get fully vectorized.
 ; S[0] = (A[0] - B[0]) + C[0]
 ; S[1] = (C[1] - B[1]) + A[1]
-define void @test_supernode_addsub(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
+define void @test_supernode_addsub(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
 ; ENABLED-LABEL: @test_supernode_addsub(
 ; ENABLED-NEXT:  entry:
-; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
-; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
-; ENABLED-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
-; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; ENABLED-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
-; ENABLED-NEXT:    [[C1:%.*]] = load double, double* [[IDXC1]], align 8
+; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
+; ENABLED-NEXT:    [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
+; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
+; ENABLED-NEXT:    [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
+; ENABLED-NEXT:    [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
 ; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
 ; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
 ; ENABLED-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
 ; ENABLED-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
 ; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
 ; ENABLED-NEXT:    [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
-; ENABLED-NEXT:    [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; ENABLED-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; ENABLED-NEXT:    store <2 x double> [[TMP7]], ptr [[SARRAY:%.*]], align 8
 ; ENABLED-NEXT:    ret void
 ;
 entry:
-  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
-  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
-  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
-  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
-  %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
-  %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
-  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
-  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
+  %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
+  %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
+  %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
+  %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
 
-  %A0 = load double, double *%idxA0, align 8
-  %A1 = load double, double *%idxA1, align 8
+  %A0 = load double, ptr %Aarray, align 8
+  %A1 = load double, ptr %idxA1, align 8
 
-  %B0 = load double, double *%idxB0, align 8
-  %B1 = load double, double *%idxB1, align 8
+  %B0 = load double, ptr %Barray, align 8
+  %B1 = load double, ptr %idxB1, align 8
 
-  %C0 = load double, double *%idxC0, align 8
-  %C1 = load double, double *%idxC1, align 8
+  %C0 = load double, ptr %Carray, align 8
+  %C1 = load double, ptr %idxC1, align 8
 
   %subA0B0 = fsub fast double %A0, %B0
   %subC1B1 = fsub fast double %C1, %B1
   %add0 = fadd fast double %subA0B0, %C0
   %add1 = fadd fast double %subC1B1, %A1
-  store double %add0, double *%idxS0, align 8
-  store double %add1, double *%idxS1, align 8
+  store double %add0, ptr %Sarray, align 8
+  store double %add1, ptr %idxS1, align 8
   ret void
 }
 
@@ -119,56 +99,48 @@ entry:
 ;
 ; S[0] = (A[0] - B[0]) - C[0]
 ; S[1] = (B[1] + C[1]) + A[1]
-define void @test_supernode_addsub_alt(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
+define void @test_supernode_addsub_alt(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Sarray) {
 ; ENABLED-LABEL: @test_supernode_addsub_alt(
 ; ENABLED-NEXT:  entry:
-; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
-; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
-; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
-; ENABLED-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
-; ENABLED-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
-; ENABLED-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
-; ENABLED-NEXT:    [[C1:%.*]] = load double, double* [[IDXC1]], align 8
+; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, ptr [[SARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
+; ENABLED-NEXT:    [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
+; ENABLED-NEXT:    [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
+; ENABLED-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
+; ENABLED-NEXT:    [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
+; ENABLED-NEXT:    [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
 ; ENABLED-NEXT:    [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
 ; ENABLED-NEXT:    [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
 ; ENABLED-NEXT:    [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
 ; ENABLED-NEXT:    [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
-; ENABLED-NEXT:    store double [[SUB0]], double* [[IDXS0]], align 8
-; ENABLED-NEXT:    store double [[ADD1]], double* [[IDXS1]], align 8
+; ENABLED-NEXT:    store double [[SUB0]], ptr [[SARRAY]], align 8
+; ENABLED-NEXT:    store double [[ADD1]], ptr [[IDXS1]], align 8
 ; ENABLED-NEXT:    ret void
 ;
 entry:
-  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
-  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
-  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
-  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
-  %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
-  %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
-  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
-  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
+  %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
+  %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
+  %idxC1 = getelementptr inbounds double, ptr %Carray, i64 1
+  %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
 
-  %A0 = load double, double *%idxA0, align 8
-  %A1 = load double, double *%idxA1, align 8
+  %A0 = load double, ptr %Aarray, align 8
+  %A1 = load double, ptr %idxA1, align 8
 
-  %B0 = load double, double *%idxB0, align 8
-  %B1 = load double, double *%idxB1, align 8
+  %B0 = load double, ptr %Barray, align 8
+  %B1 = load double, ptr %idxB1, align 8
 
-  %C0 = load double, double *%idxC0, align 8
-  %C1 = load double, double *%idxC1, align 8
+  %C0 = load double, ptr %Carray, align 8
+  %C1 = load double, ptr %idxC1, align 8
 
   %subA0B0 = fsub fast double %A0, %B0
   %addB1C1 = fadd fast double %B1, %C1
   %sub0 = fsub fast double %subA0B0, %C0
   %add1 = fadd fast double %addB1C1, %A1
-  store double %sub0, double *%idxS0, align 8
-  store double %add1, double *%idxS1, align 8
+  store double %sub0, ptr %Sarray, align 8
+  store double %add1, ptr %idxS1, align 8
   ret void
 }
 
@@ -200,55 +172,43 @@ entry:
 ; t3 = A1 + D
 ; But D is defined *after* its use.
 ;
-define void @supernode_scheduling(double* %Aarray, double* %Barray, double *%Carray, double *%Darray, double *%Sarray) {
+define void @supernode_scheduling(ptr %Aarray, ptr %Barray, ptr %Carray, ptr %Darray, ptr %Sarray) {
 ; ENABLED-LABEL: @supernode_scheduling(
 ; ENABLED-NEXT:  entry:
-; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXC:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXD:%.*]] = getelementptr inbounds double, double* [[DARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[C:%.*]] = load double, double* [[IDXC]], align 8
-; ENABLED-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
-; ENABLED-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
-; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; ENABLED-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
+; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[C:%.*]] = load double, ptr [[CARRAY:%.*]], align 8
+; ENABLED-NEXT:    [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
+; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[AARRAY:%.*]], align 8
+; ENABLED-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
 ; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
 ; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B1]], i32 1
 ; ENABLED-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
-; ENABLED-NEXT:    [[D:%.*]] = load double, double* [[IDXD]], align 8
+; ENABLED-NEXT:    [[D:%.*]] = load double, ptr [[DARRAY:%.*]], align 8
 ; ENABLED-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
 ; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[D]], i32 1
 ; ENABLED-NEXT:    [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
-; ENABLED-NEXT:    [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; ENABLED-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; ENABLED-NEXT:    store <2 x double> [[TMP7]], ptr [[SARRAY:%.*]], align 8
 ; ENABLED-NEXT:    ret void
 ;
 entry:
-  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
-  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
-  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
-  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
-  %idxC = getelementptr inbounds double, double* %Carray, i64 0
-  %idxD = getelementptr inbounds double, double* %Darray, i64 0
-  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
-  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
+  %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
+  %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
+  %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
 
 
-  %A0 = load double, double *%idxA0, align 8
-  %C = load double, double *%idxC, align 8
+  %A0 = load double, ptr %Aarray, align 8
+  %C = load double, ptr %Carray, align 8
   %t1 = fadd fast double %A0, %C
-  %B0 = load double, double *%idxB0, align 8
+  %B0 = load double, ptr %Barray, align 8
   %t2 = fadd fast double %t1, %B0
-  %A1 = load double, double *%idxA1, align 8
-  %B1 = load double, double *%idxB1, align 8
+  %A1 = load double, ptr %idxA1, align 8
+  %B1 = load double, ptr %idxB1, align 8
   %t3 = fadd fast double %A1, %B1
-  %D = load double, double *%idxD, align 8
+  %D = load double, ptr %Darray, align 8
   %t4 = fadd fast double %t3, %D
 
-  store double %t2, double *%idxS0, align 8
-  store double %t4, double *%idxS1, align 8
+  store double %t2, ptr %Sarray, align 8
+  store double %t4, ptr %idxS1, align 8
   ret void
 }
 
@@ -269,52 +229,45 @@ entry:
 ;  B0 = ...
 ;  S[0] = Tmp0 + B0
 ;  S[1] = Tmp1 + A1
-define void @supernode_scheduling_cross_block(double* %Aarray, double* %Barray, double *%Sarray) {
+define void @supernode_scheduling_cross_block(ptr %Aarray, ptr %Barray, ptr %Sarray) {
 ; ENABLED-LABEL: @supernode_scheduling_cross_block(
 ; ENABLED-NEXT:  entry:
-; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
-; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
-; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
-; ENABLED-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
+; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
+; ENABLED-NEXT:    [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
+; ENABLED-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
 ; ENABLED-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
 ; ENABLED-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
 ; ENABLED-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
 ; ENABLED-NEXT:    br label [[BB:%.*]]
 ; ENABLED:       bb:
-; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
-; ENABLED-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
+; ENABLED-NEXT:    [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
+; ENABLED-NEXT:    [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
 ; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
 ; ENABLED-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
 ; ENABLED-NEXT:    [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
-; ENABLED-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; ENABLED-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; ENABLED-NEXT:    store <2 x double> [[TMP5]], ptr [[SARRAY:%.*]], align 8
 ; ENABLED-NEXT:    ret void
 ;
 entry:
-  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
-  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
-  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
-  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
-  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
-  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
+  %idxA1 = getelementptr inbounds double, ptr %Aarray, i64 1
+  %idxB1 = getelementptr inbounds double, ptr %Barray, i64 1
+  %idxS1 = getelementptr inbounds double, ptr %Sarray, i64 1
 
-  %A0 = load double, double *%idxA0, align 8
-  %B1 = load double, double *%idxB1, align 8
+  %A0 = load double, ptr %Aarray, align 8
+  %B1 = load double, ptr %idxB1, align 8
   %Tmp0 = fadd fast double %A0, 2.0
   %Tmp1 = fadd fast double %B1, 2.0
 br label %bb
 
 bb:
-  %A1 = load double, double *%idxA1, align 8
-  %B0 = load double, double *%idxB0, align 8
+  %A1 = load double, ptr %idxA1, align 8
+  %B0 = load double, ptr %Barray, align 8
 
   %Sum0 = fadd fast double %Tmp0, %B0
   %Sum1 = fadd fast double %Tmp1, %A1
 
-  store double %Sum0, double *%idxS0, align 8
-  store double %Sum1, double *%idxS1, align 8
+  store double %Sum0, ptr %Sarray, align 8
+  store double %Sum1, ptr %idxS1, align 8
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index 795107c825107..8559021b49950 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -1,21 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
-define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+define void @tiny_tree_fully_vectorizable(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %count) #0 {
 ; CHECK-LABEL: @tiny_tree_fully_vectorizable(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP12:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP12]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_015:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[DST_ADDR_014:%.*]] = phi double* [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[SRC_ADDR_013:%.*]] = phi double* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[SRC_ADDR_013]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[DST_ADDR_014]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP1]], <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 [[I_015]]
-; CHECK-NEXT:    [[ADD_PTR4]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 [[I_015]]
+; CHECK-NEXT:    [[DST_ADDR_014:%.*]] = phi ptr [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[SRC_ADDR_013:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[SRC_ADDR_013]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[DST_ADDR_014]], align 8
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, ptr [[SRC_ADDR_013]], i64 [[I_015]]
+; CHECK-NEXT:    [[ADD_PTR4]] = getelementptr inbounds double, ptr [[DST_ADDR_014]], i64 [[I_015]]
 ; CHECK-NEXT:    [[INC]] = add i64 [[I_015]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
@@ -28,16 +26,16 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
-  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load double, double* %src.addr.013, align 8
-  store double %0, double* %dst.addr.014, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
-  store double %1, double* %arrayidx3, align 8
-  %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
-  %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
+  %dst.addr.014 = phi ptr [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi ptr [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double, ptr %src.addr.013, align 8
+  store double %0, ptr %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %src.addr.013, i64 1
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %dst.addr.014, i64 1
+  store double %1, ptr %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double, ptr %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double, ptr %dst.addr.014, i64 %i.015
   %inc = add i64 %i.015, 1
   %exitcond = icmp eq i64 %inc, %count
   br i1 %exitcond, label %for.end, label %for.body
@@ -46,21 +44,19 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+define void @tiny_tree_fully_vectorizable2(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %count) #0 {
 ; CHECK-LABEL: @tiny_tree_fully_vectorizable2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP20:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP20]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_023:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC_ADDR_021]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[DST_ADDR_022]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 [[I_023]]
-; CHECK-NEXT:    [[ADD_PTR8]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 [[I_023]]
+; CHECK-NEXT:    [[DST_ADDR_022:%.*]] = phi ptr [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[SRC_ADDR_021:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC_ADDR_021]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST_ADDR_022]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 [[I_023]]
+; CHECK-NEXT:    [[ADD_PTR8]] = getelementptr inbounds float, ptr [[DST_ADDR_022]], i64 [[I_023]]
 ; CHECK-NEXT:    [[INC]] = add i64 [[I_023]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
@@ -73,24 +69,24 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
-  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load float, float* %src.addr.021, align 4
-  store float %0, float* %dst.addr.022, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1
-  %1 = load float, float* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
-  store float %1, float* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
-  %2 = load float, float* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
-  store float %2, float* %arrayidx5, align 4
-  %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
-  %3 = load float, float* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
-  store float %3, float* %arrayidx7, align 4
-  %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
-  %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
+  %dst.addr.022 = phi ptr [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi ptr [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float, ptr %src.addr.021, align 4
+  store float %0, ptr %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %src.addr.021, i64 1
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %dst.addr.022, i64 1
+  store float %1, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %src.addr.021, i64 2
+  %2 = load float, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %dst.addr.022, i64 2
+  store float %2, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %src.addr.021, i64 3
+  %3 = load float, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %dst.addr.022, i64 3
+  store float %3, ptr %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float, ptr %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float, ptr %dst.addr.022, i64 %i.023
   %inc = add i64 %i.023, 1
   %exitcond = icmp eq i64 %inc, %count
   br i1 %exitcond, label %for.end, label %for.body
@@ -101,23 +97,23 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; We do not vectorize the tiny tree which is not fully vectorizable.
 
-define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+define void @tiny_tree_not_fully_vectorizable(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %count) #0 {
 ; CHECK-LABEL: @tiny_tree_not_fully_vectorizable(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP12:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP12]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_015:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[DST_ADDR_014:%.*]] = phi double* [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[SRC_ADDR_013:%.*]] = phi double* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* [[SRC_ADDR_013]], align 8
-; CHECK-NEXT:    store double [[TMP0]], double* [[DST_ADDR_014]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 1
-; CHECK-NEXT:    store double [[TMP1]], double* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 [[I_015]]
-; CHECK-NEXT:    [[ADD_PTR4]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 [[I_015]]
+; CHECK-NEXT:    [[DST_ADDR_014:%.*]] = phi ptr [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[SRC_ADDR_013:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[SRC_ADDR_013]], align 8
+; CHECK-NEXT:    store double [[TMP0]], ptr [[DST_ADDR_014]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[SRC_ADDR_013]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[DST_ADDR_014]], i64 1
+; CHECK-NEXT:    store double [[TMP1]], ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, ptr [[SRC_ADDR_013]], i64 [[I_015]]
+; CHECK-NEXT:    [[ADD_PTR4]] = getelementptr inbounds double, ptr [[DST_ADDR_014]], i64 [[I_015]]
 ; CHECK-NEXT:    [[INC]] = add i64 [[I_015]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
@@ -130,16 +126,16 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
-  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load double, double* %src.addr.013, align 8
-  store double %0, double* %dst.addr.014, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
-  store double %1, double* %arrayidx3, align 8
-  %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
-  %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
+  %dst.addr.014 = phi ptr [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi ptr [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double, ptr %src.addr.013, align 8
+  store double %0, ptr %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %src.addr.013, i64 2
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %dst.addr.014, i64 1
+  store double %1, ptr %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double, ptr %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double, ptr %dst.addr.014, i64 %i.015
   %inc = add i64 %i.015, 1
   %exitcond = icmp eq i64 %inc, %count
   br i1 %exitcond, label %for.end, label %for.body
@@ -148,29 +144,27 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+define void @tiny_tree_not_fully_vectorizable2(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %count) #0 {
 ; CHECK-LABEL: @tiny_tree_not_fully_vectorizable2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP20:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP20]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_023:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[SRC_ADDR_021]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[DST_ADDR_022:%.*]] = phi ptr [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[SRC_ADDR_021:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_021]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[DST_ADDR_022]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 [[I_023]]
-; CHECK-NEXT:    [[ADD_PTR8]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 [[I_023]]
+; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[DST_ADDR_022]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 [[I_023]]
+; CHECK-NEXT:    [[ADD_PTR8]] = getelementptr inbounds float, ptr [[DST_ADDR_022]], i64 [[I_023]]
 ; CHECK-NEXT:    [[INC]] = add i64 [[I_023]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
@@ -183,24 +177,24 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
-  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load float, float* %src.addr.021, align 4
-  store float %0, float* %dst.addr.022, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4
-  %1 = load float, float* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
-  store float %1, float* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
-  %2 = load float, float* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
-  store float %2, float* %arrayidx5, align 4
-  %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
-  %3 = load float, float* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
-  store float %3, float* %arrayidx7, align 4
-  %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
-  %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
+  %dst.addr.022 = phi ptr [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi ptr [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float, ptr %src.addr.021, align 4
+  store float %0, ptr %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %src.addr.021, i64 4
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %dst.addr.022, i64 1
+  store float %1, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %src.addr.021, i64 2
+  %2 = load float, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %dst.addr.022, i64 2
+  store float %2, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %src.addr.021, i64 3
+  %3 = load float, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %dst.addr.022, i64 3
+  store float %3, ptr %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float, ptr %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float, ptr %dst.addr.022, i64 %i.023
   %inc = add i64 %i.023, 1
   %exitcond = icmp eq i64 %inc, %count
   br i1 %exitcond, label %for.end, label %for.body
@@ -209,118 +203,107 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-define void @store_splat(float*, float) {
+define void @store_splat(ptr, float) {
 ; CHECK-LABEL: @store_splat(
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], ptr [[TMP0:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %3 = getelementptr inbounds float, float* %0, i64 0
-  store float %1, float* %3, align 4
-  %4 = getelementptr inbounds float, float* %0, i64 1
-  store float %1, float* %4, align 4
-  %5 = getelementptr inbounds float, float* %0, i64 2
-  store float %1, float* %5, align 4
-  %6 = getelementptr inbounds float, float* %0, i64 3
-  store float %1, float* %6, align 4
+  store float %1, ptr %0, align 4
+  %3 = getelementptr inbounds float, ptr %0, i64 1
+  store float %1, ptr %3, align 4
+  %4 = getelementptr inbounds float, ptr %0, i64 2
+  store float %1, ptr %4, align 4
+  %5 = getelementptr inbounds float, ptr %0, i64 3
+  store float %1, ptr %5, align 4
   ret void
 }
 
-define void @store_const(i32* %a) {
+define void @store_const(ptr %a) {
 ; CHECK-LABEL: @store_const(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[PTR0]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> <i32 10, i32 30, i32 20, i32 40>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    store <4 x i32> <i32 10, i32 30, i32 20, i32 40>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %ptr0 = getelementptr inbounds i32, i32* %a, i64 0
-  store i32 10, i32* %ptr0, align 4
-  %ptr1 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 30, i32* %ptr1, align 4
-  %ptr2 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 20, i32* %ptr2, align 4
-  %ptr3 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 40, i32* %ptr3, align 4
+  store i32 10, ptr %a, align 4
+  %ptr1 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 30, ptr %ptr1, align 4
+  %ptr2 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 20, ptr %ptr2, align 4
+  %ptr3 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 40, ptr %ptr3, align 4
   ret void
 }
 
-define void @tiny_vector_gather(i32 *%a, i32 *%v1, i32 *%v2) {
+define void @tiny_vector_gather(ptr %a, ptr %v1, ptr %v2) {
 ; CHECK-LABEL: @tiny_vector_gather(
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[V1:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[V2:%.*]], align 4
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR0]], align 16
-; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR1]], align 4
-; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR2]], align 8
-; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR3]], align 4
-; CHECK-NEXT:    [[PTR4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR4]], align 16
-; CHECK-NEXT:    [[PTR5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 5
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR5]], align 4
-; CHECK-NEXT:    [[PTR6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 6
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[PTR6]], align 8
-; CHECK-NEXT:    [[PTR7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 7
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PTR7]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[V1:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[V2:%.*]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[A:%.*]], align 16
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[PTR1]], align 4
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[PTR2]], align 8
+; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[PTR3]], align 4
+; CHECK-NEXT:    [[PTR4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[PTR4]], align 16
+; CHECK-NEXT:    [[PTR5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 5
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[PTR5]], align 4
+; CHECK-NEXT:    [[PTR6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 6
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[PTR6]], align 8
+; CHECK-NEXT:    [[PTR7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 7
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %1 = load i32, i32* %v1, align 4
-  %2 = load i32, i32* %v2, align 4
-  %ptr0 = getelementptr inbounds i32, i32* %a, i64 0
-  store i32 %1, i32* %ptr0, align 16
-  %ptr1 = getelementptr inbounds i32, i32* %a, i64 1
-  store i32 %2, i32* %ptr1, align 4
-  %ptr2 = getelementptr inbounds i32, i32* %a, i64 2
-  store i32 %1, i32* %ptr2, align 8
-  %ptr3 = getelementptr inbounds i32, i32* %a, i64 3
-  store i32 %2, i32* %ptr3, align 4
-  %ptr4 = getelementptr inbounds i32, i32* %a, i64 4
-  store i32 %1, i32* %ptr4, align 16
-  %ptr5 = getelementptr inbounds i32, i32* %a, i64 5
-  store i32 %2, i32* %ptr5, align 4
-  %ptr6 = getelementptr inbounds i32, i32* %a, i64 6
-  store i32 %1, i32* %ptr6, align 8
-  %ptr7 = getelementptr inbounds i32, i32* %a, i64 7
-  store i32 %2, i32* %ptr7, align 4
+  %1 = load i32, ptr %v1, align 4
+  %2 = load i32, ptr %v2, align 4
+  store i32 %1, ptr %a, align 16
+  %ptr1 = getelementptr inbounds i32, ptr %a, i64 1
+  store i32 %2, ptr %ptr1, align 4
+  %ptr2 = getelementptr inbounds i32, ptr %a, i64 2
+  store i32 %1, ptr %ptr2, align 8
+  %ptr3 = getelementptr inbounds i32, ptr %a, i64 3
+  store i32 %2, ptr %ptr3, align 4
+  %ptr4 = getelementptr inbounds i32, ptr %a, i64 4
+  store i32 %1, ptr %ptr4, align 16
+  %ptr5 = getelementptr inbounds i32, ptr %a, i64 5
+  store i32 %2, ptr %ptr5, align 4
+  %ptr6 = getelementptr inbounds i32, ptr %a, i64 6
+  store i32 %1, ptr %ptr6, align 8
+  %ptr7 = getelementptr inbounds i32, ptr %a, i64 7
+  store i32 %2, ptr %ptr7, align 4
   ret void
 }
 
-define void @tiny_vector_with_
diff _opcode(i16 *%a, i16 *%v1) {
+define void @tiny_vector_with_
diff _opcode(ptr %a, ptr %v1) {
 ; CHECK-LABEL: @tiny_vector_with_
diff _opcode(
-; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[V1:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[V1:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 undef to i16
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[TMP2]], i32 1
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[PTR0]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], <8 x i16>* [[TMP5]], align 16
+; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
-  %1 = load i16, i16* %v1, align 4
+  %1 = load i16, ptr %v1, align 4
   %2 = trunc i64 undef to i16
-  %ptr0 = getelementptr inbounds i16, i16* %a, i64 0
-  store i16 %1, i16* %ptr0, align 16
-  %ptr1 = getelementptr inbounds i16, i16* %a, i64 1
-  store i16 %2, i16* %ptr1, align 4
-  %ptr2 = getelementptr inbounds i16, i16* %a, i64 2
-  store i16 %1, i16* %ptr2, align 8
-  %ptr3 = getelementptr inbounds i16, i16* %a, i64 3
-  store i16 %2, i16* %ptr3, align 4
-  %ptr4 = getelementptr inbounds i16, i16* %a, i64 4
-  store i16 %1, i16* %ptr4, align 16
-  %ptr5 = getelementptr inbounds i16, i16* %a, i64 5
-  store i16 %2, i16* %ptr5, align 4
-  %ptr6 = getelementptr inbounds i16, i16* %a, i64 6
-  store i16 %1, i16* %ptr6, align 8
-  %ptr7 = getelementptr inbounds i16, i16* %a, i64 7
-  store i16 %2, i16* %ptr7, align 4
+  store i16 %1, ptr %a, align 16
+  %ptr1 = getelementptr inbounds i16, ptr %a, i64 1
+  store i16 %2, ptr %ptr1, align 4
+  %ptr2 = getelementptr inbounds i16, ptr %a, i64 2
+  store i16 %1, ptr %ptr2, align 8
+  %ptr3 = getelementptr inbounds i16, ptr %a, i64 3
+  store i16 %2, ptr %ptr3, align 4
+  %ptr4 = getelementptr inbounds i16, ptr %a, i64 4
+  store i16 %1, ptr %ptr4, align 16
+  %ptr5 = getelementptr inbounds i16, ptr %a, i64 5
+  store i16 %2, ptr %ptr5, align 4
+  %ptr6 = getelementptr inbounds i16, ptr %a, i64 6
+  store i16 %1, ptr %ptr6, align 8
+  %ptr7 = getelementptr inbounds i16, ptr %a, i64 7
+  store i16 %2, ptr %ptr7, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
index e4fa17cecaef5..18d175e04fb9e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
@@ -22,90 +22,90 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @uitofp_2i64_2f64() #0 {
 ; CHECK-LABEL: @uitofp_2i64_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
   %cvt0 = uitofp i64 %ld0 to double
   %cvt1 = uitofp i64 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @uitofp_4i64_4f64() #0 {
 ; SSE-LABEL: @uitofp_4i64_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2) to <2 x i64>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_4i64_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
   %cvt0 = uitofp i64 %ld0 to double
   %cvt1 = uitofp i64 %ld1 to double
   %cvt2 = uitofp i64 %ld2 to double
   %cvt3 = uitofp i64 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @uitofp_8i64_8f64() #0 {
 ; SSE-LABEL: @uitofp_8i64_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2) to <2 x i64>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <2 x i64>*), align 32
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i64> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6) to <2 x i64>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i64> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_8i64_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_8i64_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-  %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-  %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-  %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-  %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = uitofp i64 %ld0 to double
   %cvt1 = uitofp i64 %ld1 to double
   %cvt2 = uitofp i64 %ld2 to double
@@ -114,130 +114,130 @@ define void @uitofp_8i64_8f64() #0 {
   %cvt5 = uitofp i64 %ld5 to double
   %cvt6 = uitofp i64 %ld6 to double
   %cvt7 = uitofp i64 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @uitofp_2i32_2f64() #0 {
 ; SSE-LABEL: @uitofp_2i32_2f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @uitofp_2i32_2f64(
-; AVX1-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-; AVX1-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
+; AVX1-NEXT:    [[LD0:%.*]] = load i32, ptr @src32, align 64
+; AVX1-NEXT:    [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
 ; AVX1-NEXT:    [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
 ; AVX1-NEXT:    [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
-; AVX1-NEXT:    store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX1-NEXT:    store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; AVX1-NEXT:    store double [[CVT0]], ptr @dst64, align 64
+; AVX1-NEXT:    store double [[CVT1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @uitofp_2i32_2f64(
-; AVX2-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; AVX2-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; AVX2-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
-; AVX2-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX2-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_2i32_2f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
-; AVX512-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX512-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @uitofp_2i32_2f64(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
-; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; AVX256DQ-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
   %cvt0 = uitofp i32 %ld0 to double
   %cvt1 = uitofp i32 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @uitofp_4i32_4f64() #0 {
 ; SSE-LABEL: @uitofp_4i32_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_4i32_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
   %cvt0 = uitofp i32 %ld0 to double
   %cvt1 = uitofp i32 %ld1 to double
   %cvt2 = uitofp i32 %ld2 to double
   %cvt3 = uitofp i32 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @uitofp_8i32_8f64() #0 {
 ; SSE-LABEL: @uitofp_8i32_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i32> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <2 x i32>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i32> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6) to <2 x i32>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i32> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_8i32_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_8i32_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
-  %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
-  %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
-  %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
-  %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
+  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
+  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = uitofp i32 %ld0 to double
   %cvt1 = uitofp i32 %ld1 to double
   %cvt2 = uitofp i32 %ld2 to double
@@ -246,103 +246,103 @@ define void @uitofp_8i32_8f64() #0 {
   %cvt5 = uitofp i32 %ld5 to double
   %cvt6 = uitofp i32 %ld6 to double
   %cvt7 = uitofp i32 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @uitofp_2i16_2f64() #0 {
 ; CHECK-LABEL: @uitofp_2i16_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
   %cvt0 = uitofp i16 %ld0 to double
   %cvt1 = uitofp i16 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @uitofp_4i16_4f64() #0 {
 ; SSE-LABEL: @uitofp_4i16_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_4i16_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
   %cvt0 = uitofp i16 %ld0 to double
   %cvt1 = uitofp i16 %ld1 to double
   %cvt2 = uitofp i16 %ld2 to double
   %cvt3 = uitofp i16 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @uitofp_8i16_8f64() #0 {
 ; SSE-LABEL: @uitofp_8i16_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <2 x i16>*), align 8
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i16> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, <2 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6) to <2 x i16>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i16> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_8i16_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_8i16_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
-  %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
-  %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
+  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
+  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
   %cvt0 = uitofp i16 %ld0 to double
   %cvt1 = uitofp i16 %ld1 to double
   %cvt2 = uitofp i16 %ld2 to double
@@ -351,103 +351,103 @@ define void @uitofp_8i16_8f64() #0 {
   %cvt5 = uitofp i16 %ld5 to double
   %cvt6 = uitofp i16 %ld6 to double
   %cvt7 = uitofp i16 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
 define void @uitofp_2i8_2f64() #0 {
 ; CHECK-LABEL: @uitofp_2i8_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
   %cvt0 = uitofp i8 %ld0 to double
   %cvt1 = uitofp i8 %ld1 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
   ret void
 }
 
 define void @uitofp_4i8_4f64() #0 {
 ; SSE-LABEL: @uitofp_4i8_4f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_4i8_4f64(
-; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
-; AVX-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
+; AVX-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
   %cvt0 = uitofp i8 %ld0 to double
   %cvt1 = uitofp i8 %ld1 to double
   %cvt2 = uitofp i8 %ld2 to double
   %cvt3 = uitofp i8 %ld3 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
   ret void
 }
 
 define void @uitofp_8i8_8f64() #0 {
 ; SSE-LABEL: @uitofp_8i8_8f64(
-; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP2]], ptr @dst64, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <2 x i8> [[TMP3]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <2 x i8>*), align 4
+; SSE-NEXT:    store <2 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <2 x i8> [[TMP5]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6) to <2 x i8>*), align 2
+; SSE-NEXT:    store <2 x double> [[TMP6]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <2 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <2 x i8> [[TMP7]] to <2 x double>
-; SSE-NEXT:    store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16
+; SSE-NEXT:    store <2 x double> [[TMP8]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_8i8_8f64(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; AVX256-NEXT:    store <4 x double> [[TMP2]], ptr @dst64, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x double>
-; AVX256-NEXT:    store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32
+; AVX256-NEXT:    store <4 x double> [[TMP4]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_8i8_8f64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x double>
-; AVX512-NEXT:    store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64
+; AVX512-NEXT:    store <8 x double> [[TMP2]], ptr @dst64, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
-  %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
-  %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
-  %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
-  %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
+  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
+  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
+  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
+  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
   %cvt0 = uitofp i8 %ld0 to double
   %cvt1 = uitofp i8 %ld1 to double
   %cvt2 = uitofp i8 %ld2 to double
@@ -456,14 +456,14 @@ define void @uitofp_8i8_8f64() #0 {
   %cvt5 = uitofp i8 %ld5 to double
   %cvt6 = uitofp i8 %ld6 to double
   %cvt7 = uitofp i8 %ld7 to double
-  store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-  store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-  store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
-  store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
-  store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32
-  store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
-  store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16
-  store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+  store double %cvt0, ptr @dst64, align 64
+  store double %cvt1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  store double %cvt2, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 2), align 16
+  store double %cvt3, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 3), align 8
+  store double %cvt4, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 4), align 32
+  store double %cvt5, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 5), align 8
+  store double %cvt6, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 6), align 16
+  store double %cvt7, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 7), align 8
   ret void
 }
 
@@ -473,99 +473,99 @@ define void @uitofp_8i8_8f64() #0 {
 
 define void @uitofp_2i64_2f32() #0 {
 ; SSE-LABEL: @uitofp_2i64_2f32(
-; SSE-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; SSE-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; SSE-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; SSE-NEXT:    [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
 ; SSE-NEXT:    [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
-; SSE-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; SSE-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; SSE-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; SSE-NEXT:    ret void
 ;
 ; AVX1-LABEL: @uitofp_2i64_2f32(
-; AVX1-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX1-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX1-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX1-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX1-NEXT:    [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
 ; AVX1-NEXT:    [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
-; AVX1-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX1-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; AVX1-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; AVX1-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; AVX1-NEXT:    ret void
 ;
 ; AVX2-LABEL: @uitofp_2i64_2f32(
-; AVX2-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-; AVX2-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+; AVX2-NEXT:    [[LD0:%.*]] = load i64, ptr @src64, align 64
+; AVX2-NEXT:    [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
 ; AVX2-NEXT:    [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
 ; AVX2-NEXT:    [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
-; AVX2-NEXT:    store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-; AVX2-NEXT:    store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; AVX2-NEXT:    store float [[CVT0]], ptr @dst32, align 64
+; AVX2-NEXT:    store float [[CVT1]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_2i64_2f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float>
-; AVX512-NEXT:    store <2 x float> [[TMP2]], <2 x float>* bitcast ([16 x float]* @dst32 to <2 x float>*), align 64
+; AVX512-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
 ; AVX256DQ-LABEL: @uitofp_2i64_2f32(
-; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
+; AVX256DQ-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 64
 ; AVX256DQ-NEXT:    [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x float>
-; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], <2 x float>* bitcast ([16 x float]* @dst32 to <2 x float>*), align 64
+; AVX256DQ-NEXT:    store <2 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX256DQ-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
   %cvt0 = uitofp i64 %ld0 to float
   %cvt1 = uitofp i64 %ld1 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
   ret void
 }
 
 define void @uitofp_4i64_4f32() #0 {
 ; CHECK-LABEL: @uitofp_4i64_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
   %cvt0 = uitofp i64 %ld0 to float
   %cvt1 = uitofp i64 %ld1 to float
   %cvt2 = uitofp i64 %ld2 to float
   %cvt3 = uitofp i64 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @uitofp_8i64_8f32() #0 {
 ; SSE-LABEL: @uitofp_8i64_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i64> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_8i64_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @src64, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
-  %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
-  %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
-  %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
-  %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32
-  %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8
-  %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16
-  %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8
+  %ld0 = load i64, ptr @src64, align 64
+  %ld1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 1), align 8
+  %ld2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 2), align 16
+  %ld3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 3), align 8
+  %ld4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 4), align 32
+  %ld5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 5), align 8
+  %ld6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 6), align 16
+  %ld7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @src64, i32 0, i64 7), align 8
   %cvt0 = uitofp i64 %ld0 to float
   %cvt1 = uitofp i64 %ld1 to float
   %cvt2 = uitofp i64 %ld2 to float
@@ -574,63 +574,63 @@ define void @uitofp_8i64_8f32() #0 {
   %cvt5 = uitofp i64 %ld5 to float
   %cvt6 = uitofp i64 %ld6 to float
   %cvt7 = uitofp i64 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @uitofp_4i32_4f32() #0 {
 ; CHECK-LABEL: @uitofp_4i32_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
   %cvt0 = uitofp i32 %ld0 to float
   %cvt1 = uitofp i32 %ld1 to float
   %cvt2 = uitofp i32 %ld2 to float
   %cvt3 = uitofp i32 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @uitofp_8i32_8f32() #0 {
 ; SSE-LABEL: @uitofp_8i32_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_8i32_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
-  %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
-  %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8
-  %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4
-  %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16
-  %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4
-  %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8
-  %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4
+  %ld0 = load i32, ptr @src32, align 64
+  %ld1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1), align 4
+  %ld2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2), align 8
+  %ld3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3), align 4
+  %ld4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
+  %ld5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5), align 4
+  %ld6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6), align 8
+  %ld7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7), align 4
   %cvt0 = uitofp i32 %ld0 to float
   %cvt1 = uitofp i32 %ld1 to float
   %cvt2 = uitofp i32 %ld2 to float
@@ -639,64 +639,64 @@ define void @uitofp_8i32_8f32() #0 {
   %cvt5 = uitofp i32 %ld5 to float
   %cvt6 = uitofp i32 %ld6 to float
   %cvt7 = uitofp i32 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @uitofp_16i32_16f32() #0 {
 ; SSE-LABEL: @uitofp_16i32_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <4 x i32> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <4 x i32> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_16i32_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <8 x i32> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_16i32_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @src32, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <16 x i32> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64
-  %ld1  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4
-  %ld2  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8
-  %ld3  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4
-  %ld4  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16
-  %ld5  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4
-  %ld6  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8
-  %ld7  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4
-  %ld8  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32
-  %ld9  = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4
-  %ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8
-  %ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4
-  %ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16
-  %ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4
-  %ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8
-  %ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4
+  %ld0  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 0 ), align 64
+  %ld1  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 1 ), align 4
+  %ld2  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 2 ), align 8
+  %ld3  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 3 ), align 4
+  %ld4  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 4 ), align 16
+  %ld5  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 5 ), align 4
+  %ld6  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 6 ), align 8
+  %ld7  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 7 ), align 4
+  %ld8  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 8 ), align 32
+  %ld9  = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 9 ), align 4
+  %ld10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 10), align 8
+  %ld11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 11), align 4
+  %ld12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 12), align 16
+  %ld13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 13), align 4
+  %ld14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 14), align 8
+  %ld15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @src32, i32 0, i64 15), align 4
   %cvt0  = uitofp i32 %ld0  to float
   %cvt1  = uitofp i32 %ld1  to float
   %cvt2  = uitofp i32 %ld2  to float
@@ -713,71 +713,71 @@ define void @uitofp_16i32_16f32() #0 {
   %cvt13 = uitofp i32 %ld13 to float
   %cvt14 = uitofp i32 %ld14 to float
   %cvt15 = uitofp i32 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @uitofp_4i16_4f32() #0 {
 ; CHECK-LABEL: @uitofp_4i16_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
   %cvt0 = uitofp i16 %ld0 to float
   %cvt1 = uitofp i16 %ld1 to float
   %cvt2 = uitofp i16 %ld2 to float
   %cvt3 = uitofp i16 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @uitofp_8i16_8f32() #0 {
 ; SSE-LABEL: @uitofp_8i16_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_8i16_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-  %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-  %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4
-  %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2
-  %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8
-  %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2
-  %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4
-  %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2
+  %ld0 = load i16, ptr @src16, align 64
+  %ld1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1), align 2
+  %ld2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2), align 4
+  %ld3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3), align 2
+  %ld4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
+  %ld5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5), align 2
+  %ld6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6), align 4
+  %ld7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7), align 2
   %cvt0 = uitofp i16 %ld0 to float
   %cvt1 = uitofp i16 %ld1 to float
   %cvt2 = uitofp i16 %ld2 to float
@@ -786,64 +786,64 @@ define void @uitofp_8i16_8f32() #0 {
   %cvt5 = uitofp i16 %ld5 to float
   %cvt6 = uitofp i16 %ld6 to float
   %cvt7 = uitofp i16 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @uitofp_16i16_16f32() #0 {
 ; SSE-LABEL: @uitofp_16i16_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr @src16, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4), align 8
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <4 x i16> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <4 x i16> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_16i16_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8), align 16
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <8 x i16> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_16i16_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <16 x i16> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64
-  %ld1  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2
-  %ld2  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4
-  %ld3  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2
-  %ld4  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8
-  %ld5  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2
-  %ld6  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4
-  %ld7  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2
-  %ld8  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16
-  %ld9  = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2
-  %ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4
-  %ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2
-  %ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8
-  %ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2
-  %ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4
-  %ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2
+  %ld0  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 0 ), align 64
+  %ld1  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 1 ), align 2
+  %ld2  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 2 ), align 4
+  %ld3  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 3 ), align 2
+  %ld4  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 4 ), align 8
+  %ld5  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 5 ), align 2
+  %ld6  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 6 ), align 4
+  %ld7  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 7 ), align 2
+  %ld8  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 8 ), align 16
+  %ld9  = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 9 ), align 2
+  %ld10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 10), align 4
+  %ld11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 11), align 2
+  %ld12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 12), align 8
+  %ld13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 13), align 2
+  %ld14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 14), align 4
+  %ld15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @src16, i32 0, i64 15), align 2
   %cvt0  = uitofp i16 %ld0  to float
   %cvt1  = uitofp i16 %ld1  to float
   %cvt2  = uitofp i16 %ld2  to float
@@ -860,71 +860,71 @@ define void @uitofp_16i16_16f32() #0 {
   %cvt13 = uitofp i16 %ld13 to float
   %cvt14 = uitofp i16 %ld14 to float
   %cvt15 = uitofp i16 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 
 define void @uitofp_4i8_4f32() #0 {
 ; CHECK-LABEL: @uitofp_4i8_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
 ; CHECK-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
   %cvt0 = uitofp i8 %ld0 to float
   %cvt1 = uitofp i8 %ld1 to float
   %cvt2 = uitofp i8 %ld2 to float
   %cvt3 = uitofp i8 %ld3 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
   ret void
 }
 
 define void @uitofp_8i8_8f32() #0 {
 ; SSE-LABEL: @uitofp_8i8_8f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @uitofp_8i8_8f32(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX-NEXT:    [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
-; AVX-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
+; AVX-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX-NEXT:    ret void
 ;
-  %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-  %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-  %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2
-  %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1
-  %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4
-  %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1
-  %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2
-  %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1
+  %ld0 = load i8, ptr @src8, align 64
+  %ld1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1), align 1
+  %ld2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2), align 2
+  %ld3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3), align 1
+  %ld4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
+  %ld5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5), align 1
+  %ld6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6), align 2
+  %ld7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7), align 1
   %cvt0 = uitofp i8 %ld0 to float
   %cvt1 = uitofp i8 %ld1 to float
   %cvt2 = uitofp i8 %ld2 to float
@@ -933,64 +933,64 @@ define void @uitofp_8i8_8f32() #0 {
   %cvt5 = uitofp i8 %ld5 to float
   %cvt6 = uitofp i8 %ld6 to float
   %cvt7 = uitofp i8 %ld7 to float
-  store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
-  store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
-  store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
-  store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
-  store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16
-  store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
-  store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8
-  store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+  store float %cvt0, ptr @dst32, align 64
+  store float %cvt1, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1), align 4
+  store float %cvt2, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2), align 8
+  store float %cvt3, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3), align 4
+  store float %cvt4, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+  store float %cvt5, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5), align 4
+  store float %cvt6, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6), align 8
+  store float %cvt7, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7), align 4
   ret void
 }
 
 define void @uitofp_16i8_16f32() #0 {
 ; SSE-LABEL: @uitofp_16i8_16f32(
-; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64
+; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @src8, align 64
 ; SSE-NEXT:    [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
-; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP2]], ptr @dst32, align 64
+; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP4:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8
+; SSE-NEXT:    store <4 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4), align 16
+; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = uitofp <4 x i8> [[TMP5]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32
-; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4
+; SSE-NEXT:    store <4 x float> [[TMP6]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
+; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
 ; SSE-NEXT:    [[TMP8:%.*]] = uitofp <4 x i8> [[TMP7]] to <4 x float>
-; SSE-NEXT:    store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16
+; SSE-NEXT:    store <4 x float> [[TMP8]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
 ; SSE-NEXT:    ret void
 ;
 ; AVX256-LABEL: @uitofp_16i8_16f32(
-; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64
+; AVX256-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @src8, align 64
 ; AVX256-NEXT:    [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64
-; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8
+; AVX256-NEXT:    store <8 x float> [[TMP2]], ptr @dst32, align 64
+; AVX256-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8), align 8
 ; AVX256-NEXT:    [[TMP4:%.*]] = uitofp <8 x i8> [[TMP3]] to <8 x float>
-; AVX256-NEXT:    store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32
+; AVX256-NEXT:    store <8 x float> [[TMP4]], ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8), align 32
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @uitofp_16i8_16f32(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64
+; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 64
 ; AVX512-NEXT:    [[TMP2:%.*]] = uitofp <16 x i8> [[TMP1]] to <16 x float>
-; AVX512-NEXT:    store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64
+; AVX512-NEXT:    store <16 x float> [[TMP2]], ptr @dst32, align 64
 ; AVX512-NEXT:    ret void
 ;
-  %ld0  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64
-  %ld1  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1
-  %ld2  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2
-  %ld3  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1
-  %ld4  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4
-  %ld5  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1
-  %ld6  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2
-  %ld7  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1
-  %ld8  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8
-  %ld9  = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1
-  %ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2
-  %ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1
-  %ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4
-  %ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1
-  %ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2
-  %ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1
+  %ld0  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 0 ), align 64
+  %ld1  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 1 ), align 1
+  %ld2  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 2 ), align 2
+  %ld3  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 3 ), align 1
+  %ld4  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 4 ), align 4
+  %ld5  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 5 ), align 1
+  %ld6  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 6 ), align 2
+  %ld7  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 7 ), align 1
+  %ld8  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 8 ), align 8
+  %ld9  = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 9 ), align 1
+  %ld10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 10), align 2
+  %ld11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 11), align 1
+  %ld12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 12), align 4
+  %ld13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 13), align 1
+  %ld14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 14), align 2
+  %ld15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @src8, i32 0, i64 15), align 1
   %cvt0  = uitofp i8 %ld0  to float
   %cvt1  = uitofp i8 %ld1  to float
   %cvt2  = uitofp i8 %ld2  to float
@@ -1007,22 +1007,22 @@ define void @uitofp_16i8_16f32() #0 {
   %cvt13 = uitofp i8 %ld13 to float
   %cvt14 = uitofp i8 %ld14 to float
   %cvt15 = uitofp i8 %ld15 to float
-  store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64
-  store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
-  store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8
-  store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
-  store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16
-  store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
-  store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8
-  store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
-  store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32
-  store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
-  store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8
-  store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
-  store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16
-  store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
-  store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8
-  store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+  store float %cvt0 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 0 ), align 64
+  store float %cvt1 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 1 ), align 4
+  store float %cvt2 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 2 ), align 8
+  store float %cvt3 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 3 ), align 4
+  store float %cvt4 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 4 ), align 16
+  store float %cvt5 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 5 ), align 4
+  store float %cvt6 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 6 ), align 8
+  store float %cvt7 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 7 ), align 4
+  store float %cvt8 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 8 ), align 32
+  store float %cvt9 , ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 9 ), align 4
+  store float %cvt10, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 10), align 8
+  store float %cvt11, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 11), align 4
+  store float %cvt12, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 12), align 16
+  store float %cvt13, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 13), align 4
+  store float %cvt14, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 14), align 8
+  store float %cvt15, ptr getelementptr inbounds ([16 x float], ptr @dst32, i32 0, i64 15), align 4
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
index a280781e22712..16f0209ac7f0d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
@@ -6,9 +6,8 @@
 define void @_Z2azv() local_unnamed_addr {
 ; CHECK-LABEL: @_Z2azv(
 ; CHECK-NEXT:  for.body.lr.ph:
-; CHECK-NEXT:    [[DOTSROA_CAST_4:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 4, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[DOTSROA_CAST_4:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 4, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[DOTSROA_CAST_4]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = icmp sgt i32 [[TMP2]], undef
 ; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP2]], i32 undef
@@ -19,34 +18,34 @@ define void @_Z2azv() local_unnamed_addr {
 ; CHECK-NEXT:    ret void
 ;
 for.body.lr.ph:
-  %.sroa_cast.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 4, i32 0
-  %retval.sroa.0.0.copyload.i5.4 = load i32, i32* %.sroa_cast.4, align 4
-  %.sroa_raw_idx.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 4, i32 1
-  %retval.sroa.0.0.copyload.i7.4 = load i32, i32* %.sroa_raw_idx.4, align 4
+  %.sroa_cast.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 4, i32 0
+  %retval.sroa.0.0.copyload.i5.4 = load i32, ptr %.sroa_cast.4, align 4
+  %.sroa_raw_idx.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 4, i32 1
+  %retval.sroa.0.0.copyload.i7.4 = load i32, ptr %.sroa_raw_idx.4, align 4
   %cmp.i2.4 = icmp slt i32 %retval.sroa.0.0.copyload.i5.4, %retval.sroa.0.0.copyload.i7.4
   %0 = select i1 %cmp.i2.4, i32 %retval.sroa.0.0.copyload.i7.4, i32 %retval.sroa.0.0.copyload.i5.4
   %cmp.i1.4 = icmp slt i32 undef, %0
   %.sroa.speculated.4 = select i1 %cmp.i1.4, i32 %0, i32 undef
-  %.sroa_cast.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 5, i32 0
-  %retval.sroa.0.0.copyload.i5.5 = load i32, i32* %.sroa_cast.5, align 4
-  %.sroa_raw_idx.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 5, i32 1
-  %retval.sroa.0.0.copyload.i7.5 = load i32, i32* %.sroa_raw_idx.5, align 4
+  %.sroa_cast.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 5, i32 0
+  %retval.sroa.0.0.copyload.i5.5 = load i32, ptr %.sroa_cast.5, align 4
+  %.sroa_raw_idx.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 5, i32 1
+  %retval.sroa.0.0.copyload.i7.5 = load i32, ptr %.sroa_raw_idx.5, align 4
   %cmp.i2.5 = icmp slt i32 %retval.sroa.0.0.copyload.i5.5, %retval.sroa.0.0.copyload.i7.5
   %1 = select i1 %cmp.i2.5, i32 %retval.sroa.0.0.copyload.i7.5, i32 %retval.sroa.0.0.copyload.i5.5
   %cmp.i1.5 = icmp slt i32 %.sroa.speculated.4, %1
   %.sroa.speculated.5 = select i1 %cmp.i1.5, i32 %1, i32 %.sroa.speculated.4
-  %.sroa_cast.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 6, i32 0
-  %retval.sroa.0.0.copyload.i5.6 = load i32, i32* %.sroa_cast.6, align 4
-  %.sroa_raw_idx.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 6, i32 1
-  %retval.sroa.0.0.copyload.i7.6 = load i32, i32* %.sroa_raw_idx.6, align 4
+  %.sroa_cast.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 6, i32 0
+  %retval.sroa.0.0.copyload.i5.6 = load i32, ptr %.sroa_cast.6, align 4
+  %.sroa_raw_idx.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 6, i32 1
+  %retval.sroa.0.0.copyload.i7.6 = load i32, ptr %.sroa_raw_idx.6, align 4
   %cmp.i2.6 = icmp slt i32 %retval.sroa.0.0.copyload.i5.6, %retval.sroa.0.0.copyload.i7.6
   %2 = select i1 %cmp.i2.6, i32 %retval.sroa.0.0.copyload.i7.6, i32 %retval.sroa.0.0.copyload.i5.6
   %cmp.i1.6 = icmp slt i32 %.sroa.speculated.5, %2
   %.sroa.speculated.6 = select i1 %cmp.i1.6, i32 %2, i32 %.sroa.speculated.5
-  %.sroa_cast.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 0
-  %retval.sroa.0.0.copyload.i5.7 = load i32, i32* %.sroa_cast.7, align 4
-  %.sroa_raw_idx.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1
-  %retval.sroa.0.0.copyload.i7.7 = load i32, i32* %.sroa_raw_idx.7, align 4
+  %.sroa_cast.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 7, i32 0
+  %retval.sroa.0.0.copyload.i5.7 = load i32, ptr %.sroa_cast.7, align 4
+  %.sroa_raw_idx.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 7, i32 1
+  %retval.sroa.0.0.copyload.i7.7 = load i32, ptr %.sroa_raw_idx.7, align 4
   %cmp.i2.7 = icmp slt i32 %retval.sroa.0.0.copyload.i5.7, %retval.sroa.0.0.copyload.i7.7
   %3 = select i1 %cmp.i2.7, i32 %retval.sroa.0.0.copyload.i7.7, i32 %retval.sroa.0.0.copyload.i5.7
   %cmp.i1.7 = icmp slt i32 %.sroa.speculated.6, %3

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
index 454e01c81c89f..71b957ac7bbd8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
-define void @foo(i32* nocapture %x) #0 {
+define void @foo(ptr nocapture %x) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
@@ -16,24 +16,23 @@ define void @foo(i32* nocapture %x) #0 {
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ <i32 2, i32 2, i32 2, i32 2>, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[X:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP0]], <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
   br label %bb2
 
 bb1:                                    ; an unreachable block
-  %t3 = getelementptr inbounds i32, i32* %x, i64 4
-  %t4 = load i32, i32* %t3, align 4
-  %t5 = getelementptr inbounds i32, i32* %x, i64 5
-  %t6 = load i32, i32* %t5, align 4
+  %t3 = getelementptr inbounds i32, ptr %x, i64 4
+  %t4 = load i32, ptr %t3, align 4
+  %t5 = getelementptr inbounds i32, ptr %x, i64 5
+  %t6 = load i32, ptr %t5, align 4
   %bad = fadd float %bad, 0.000000e+00  ; <- an instruction with self dependency,
   ;    but legal in unreachable code
-  %t7 = getelementptr inbounds i32, i32* %x, i64 6
-  %t8 = load i32, i32* %t7, align 4
-  %t9 = getelementptr inbounds i32, i32* %x, i64 7
-  %t10 = load i32, i32* %t9, align 4
+  %t7 = getelementptr inbounds i32, ptr %x, i64 6
+  %t8 = load i32, ptr %t7, align 4
+  %t9 = getelementptr inbounds i32, ptr %x, i64 7
+  %t10 = load i32, ptr %t9, align 4
   br label %bb2
 
 bb2:
@@ -41,39 +40,39 @@ bb2:
   %t2.0 = phi i32 [ %t6, %bb1 ], [ 2, %entry ]
   %t3.0 = phi i32 [ %t8, %bb1 ], [ 2, %entry ]
   %t4.0 = phi i32 [ %t10, %bb1 ], [ 2, %entry ]
-  store i32 %t1.0, i32* %x, align 4
-  %t12 = getelementptr inbounds i32, i32* %x, i64 1
-  store i32 %t2.0, i32* %t12, align 4
-  %t13 = getelementptr inbounds i32, i32* %x, i64 2
-  store i32 %t3.0, i32* %t13, align 4
-  %t14 = getelementptr inbounds i32, i32* %x, i64 3
-  store i32 %t4.0, i32* %t14, align 4
+  store i32 %t1.0, ptr %x, align 4
+  %t12 = getelementptr inbounds i32, ptr %x, i64 1
+  store i32 %t2.0, ptr %t12, align 4
+  %t13 = getelementptr inbounds i32, ptr %x, i64 2
+  store i32 %t3.0, ptr %t13, align 4
+  %t14 = getelementptr inbounds i32, ptr %x, i64 3
+  store i32 %t4.0, ptr %t14, align 4
   ret void
 }
 
 define void @bar() {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load atomic i8*, i8** undef unordered, align 8
+; CHECK-NEXT:    [[TMP:%.*]] = load atomic ptr, ptr undef unordered, align 8
 ; CHECK-NEXT:    br label [[BB6:%.*]]
 ; CHECK:       bb5:
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i8*, i8** undef unordered, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic ptr, ptr undef unordered, align 8
 ; CHECK-NEXT:    br label [[BB6]]
 ; CHECK:       bb6:
-; CHECK-NEXT:    [[TMP7:%.*]] = phi i8* [ [[TMP]], [[BB5:%.*]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = phi i8* [ [[TMP4]], [[BB5]] ], [ undef, [[BB]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi ptr [ [[TMP]], [[BB5:%.*]] ], [ undef, [[BB:%.*]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi ptr [ [[TMP4]], [[BB5]] ], [ undef, [[BB]] ]
 ; CHECK-NEXT:    ret void
 ;
 bb:
-  %tmp = load atomic i8*, i8** undef unordered, align 8
+  %tmp = load atomic ptr, ptr undef unordered, align 8
   br label %bb6
 
 bb5:                                              ; No predecessors!
-  %tmp4 = load atomic i8*, i8** undef unordered, align 8
+  %tmp4 = load atomic ptr, ptr undef unordered, align 8
   br label %bb6
 
 bb6:                                              ; preds = %bb5, %bb
-  %tmp7 = phi i8* [ %tmp, %bb5 ], [ undef, %bb ]
-  %tmp8 = phi i8* [ %tmp4, %bb5 ], [ undef, %bb ]
+  %tmp7 = phi ptr [ %tmp, %bb5 ], [ undef, %bb ]
+  %tmp8 = phi ptr [ %tmp4, %bb5 ], [ undef, %bb ]
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
index 5b3c50e96d5d5..3ce914d336575 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
@@ -8,20 +8,20 @@
 define void @n() local_unnamed_addr #0 {
 ; CHECK-LABEL: @n(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x [4 x i32]]* @k to <4 x i32>*), align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 0), align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 1), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 2), align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 3), align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 0), align 16
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 1), align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 2), align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 3), align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 0), align 16
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4
-; CHECK-NEXT:    [[TMP13:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0) to <16 x i32>*), align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @k, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 0), align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 1), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 2), align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 3), align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 0), align 16
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 1), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 2), align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 3), align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 0), align 16
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 1), align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 2), align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 3), align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 0), align 16
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ]
@@ -237,44 +237,44 @@ define void @n() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]]
 ; CHECK-NEXT:    [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]]
 ; CHECK-NEXT:    [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]]
-; CHECK-NEXT:    [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4
+; CHECK-NEXT:    [[K:%.*]] = getelementptr inbounds [366 x i32], ptr @l, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[SPEC_SELECT_3_7]], ptr [[K]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ;
 entry:
-  %0 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 0), align 16
-  %1 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 1), align 4
-  %2 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 2), align 8
-  %3 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 0, i64 3), align 4
-  %4 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 0), align 16
-  %5 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 1), align 4
-  %6 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 2), align 8
-  %7 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 1, i64 3), align 4
-  %8 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 0), align 16
-  %9 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 1), align 4
-  %10 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 2), align 8
-  %11 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 2, i64 3), align 4
-  %12 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 0), align 16
-  %13 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4
-  %14 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8
-  %15 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4
-  %16 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0), align 16
-  %17 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 1), align 4
-  %18 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 2), align 8
-  %19 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 3), align 4
-  %20 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 0), align 16
-  %21 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 1), align 4
-  %22 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 2), align 8
-  %23 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 3), align 4
-  %24 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 0), align 16
-  %25 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 1), align 4
-  %26 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 2), align 8
-  %27 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 3), align 4
-  %28 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 0), align 16
-  %29 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 1), align 4
-  %30 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 2), align 8
-  %31 = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 3), align 4
+  %0 = load i32, ptr @k, align 16
+  %1 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 0, i64 1), align 4
+  %2 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 0, i64 2), align 8
+  %3 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 0, i64 3), align 4
+  %4 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 0), align 16
+  %5 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 1), align 4
+  %6 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 2), align 8
+  %7 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 1, i64 3), align 4
+  %8 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 0), align 16
+  %9 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 1), align 4
+  %10 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 2), align 8
+  %11 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 2, i64 3), align 4
+  %12 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 0), align 16
+  %13 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 1), align 4
+  %14 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 2), align 8
+  %15 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 3, i64 3), align 4
+  %16 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 0), align 16
+  %17 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 1), align 4
+  %18 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 2), align 8
+  %19 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 4, i64 3), align 4
+  %20 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 0), align 16
+  %21 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 1), align 4
+  %22 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 2), align 8
+  %23 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 5, i64 3), align 4
+  %24 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 0), align 16
+  %25 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 1), align 4
+  %26 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 2), align 8
+  %27 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 6, i64 3), align 4
+  %28 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 0), align 16
+  %29 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 1), align 4
+  %30 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 2), align 8
+  %31 = load i32, ptr getelementptr inbounds ([8 x [4 x i32]], ptr @k, i64 0, i64 7, i64 3), align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.cond, %entry
@@ -502,8 +502,8 @@ for.cond:                                         ; preds = %for.cond, %entry
   %115 = or i1 %cmp12.3.7, %112
   %spec.select.3.7 = select i1 %115, i32 7, i32 %spec.select.3.6
   %spec.select8.3.7 = select i1 %cmp12.3.7, i32 %114, i32 %spec.select8.2.7
-  %k = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 %indvars.iv
-  store i32 %spec.select.3.7, i32* %k, align 4
+  %k = getelementptr inbounds [366 x i32], ptr @l, i64 0, i64 %indvars.iv
+  store i32 %spec.select.3.7, ptr %k, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   br label %for.cond
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
index 492b761af7130..0ba168c1bb1ae 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
@@ -7,23 +7,23 @@
 ; the cost of entire "aggregate build" sequence while
 ; building vectorizable tree from only a portion of it.
 
-define void @test(i32* nocapture %t2) {
+define void @test(ptr nocapture %t2) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T2:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 7
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
-; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 1
-; CHECK-NEXT:    [[T9:%.*]] = load i32, i32* [[T8]], align 4
-; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 6
-; CHECK-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4
-; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 2
-; CHECK-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4
-; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 5
-; CHECK-NEXT:    [[T17:%.*]] = load i32, i32* [[T16]], align 4
-; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 3
-; CHECK-NEXT:    [[T21:%.*]] = load i32, i32* [[T20]], align 4
-; CHECK-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 4
-; CHECK-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T2:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
+; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
+; CHECK-NEXT:    [[T9:%.*]] = load i32, ptr [[T8]], align 4
+; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
+; CHECK-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4
+; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
+; CHECK-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4
+; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
+; CHECK-NEXT:    [[T17:%.*]] = load i32, ptr [[T16]], align 4
+; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
+; CHECK-NEXT:    [[T21:%.*]] = load i32, ptr [[T20]], align 4
+; CHECK-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 4
+; CHECK-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4
 ; CHECK-NEXT:    [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
 ; CHECK-NEXT:    [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
 ; CHECK-NEXT:    [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
@@ -52,25 +52,24 @@ define void @test(i32* nocapture %t2) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 3>
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
 ; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
+; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %t3 = load i32, i32* %t2, align 4
-  %t4 = getelementptr inbounds i32, i32* %t2, i64 7
-  %t5 = load i32, i32* %t4, align 4
-  %t8 = getelementptr inbounds i32, i32* %t2, i64 1
-  %t9 = load i32, i32* %t8, align 4
-  %t10 = getelementptr inbounds i32, i32* %t2, i64 6
-  %t11 = load i32, i32* %t10, align 4
-  %t14 = getelementptr inbounds i32, i32* %t2, i64 2
-  %t15 = load i32, i32* %t14, align 4
-  %t16 = getelementptr inbounds i32, i32* %t2, i64 5
-  %t17 = load i32, i32* %t16, align 4
-  %t20 = getelementptr inbounds i32, i32* %t2, i64 3
-  %t21 = load i32, i32* %t20, align 4
-  %t22 = getelementptr inbounds i32, i32* %t2, i64 4
-  %t23 = load i32, i32* %t22, align 4
+  %t3 = load i32, ptr %t2, align 4
+  %t4 = getelementptr inbounds i32, ptr %t2, i64 7
+  %t5 = load i32, ptr %t4, align 4
+  %t8 = getelementptr inbounds i32, ptr %t2, i64 1
+  %t9 = load i32, ptr %t8, align 4
+  %t10 = getelementptr inbounds i32, ptr %t2, i64 6
+  %t11 = load i32, ptr %t10, align 4
+  %t14 = getelementptr inbounds i32, ptr %t2, i64 2
+  %t15 = load i32, ptr %t14, align 4
+  %t16 = getelementptr inbounds i32, ptr %t2, i64 5
+  %t17 = load i32, ptr %t16, align 4
+  %t20 = getelementptr inbounds i32, ptr %t2, i64 3
+  %t21 = load i32, ptr %t20, align 4
+  %t22 = getelementptr inbounds i32, ptr %t2, i64 4
+  %t23 = load i32, ptr %t22, align 4
   %t24 = add nsw i32 %t23, %t21
   %t25 = sub nsw i32 %t21, %t23
   %t27 = sub nsw i32 %t3, %t24
@@ -99,7 +98,6 @@ define void @test(i32* nocapture %t2) {
   %t71 = insertelement <8 x i32> %t70, i32 %t34, i32 6
   %t72 = insertelement <8 x i32> %t71, i32 %t49, i32 7
   %t76 = shl <8 x i32> %t72, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %t79 = bitcast i32* %t2 to <8 x i32>*
-  store <8 x i32> %t76, <8 x i32>* %t79, align 4
+  store <8 x i32> %t76, ptr %t2, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
index 02d8eaa085eca..5347ae31ed6cb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
@@ -7,23 +7,23 @@
 ; the cost of entire "aggregate build" sequence while
 ; building vectorizable tree from only a portion of it.
 
-define void @test(i32* nocapture %t2) {
+define void @test(ptr nocapture %t2) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T2:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 7
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
-; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 1
-; CHECK-NEXT:    [[T9:%.*]] = load i32, i32* [[T8]], align 4
-; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 6
-; CHECK-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4
-; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 2
-; CHECK-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4
-; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 5
-; CHECK-NEXT:    [[T17:%.*]] = load i32, i32* [[T16]], align 4
-; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 3
-; CHECK-NEXT:    [[T21:%.*]] = load i32, i32* [[T20]], align 4
-; CHECK-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 4
-; CHECK-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T2:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
+; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
+; CHECK-NEXT:    [[T9:%.*]] = load i32, ptr [[T8]], align 4
+; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
+; CHECK-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4
+; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
+; CHECK-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4
+; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
+; CHECK-NEXT:    [[T17:%.*]] = load i32, ptr [[T16]], align 4
+; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
+; CHECK-NEXT:    [[T21:%.*]] = load i32, ptr [[T20]], align 4
+; CHECK-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 4
+; CHECK-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4
 ; CHECK-NEXT:    [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
 ; CHECK-NEXT:    [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
 ; CHECK-NEXT:    [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
@@ -52,25 +52,24 @@ define void @test(i32* nocapture %t2) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 3>
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
 ; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
+; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %t3 = load i32, i32* %t2, align 4
-  %t4 = getelementptr inbounds i32, i32* %t2, i64 7
-  %t5 = load i32, i32* %t4, align 4
-  %t8 = getelementptr inbounds i32, i32* %t2, i64 1
-  %t9 = load i32, i32* %t8, align 4
-  %t10 = getelementptr inbounds i32, i32* %t2, i64 6
-  %t11 = load i32, i32* %t10, align 4
-  %t14 = getelementptr inbounds i32, i32* %t2, i64 2
-  %t15 = load i32, i32* %t14, align 4
-  %t16 = getelementptr inbounds i32, i32* %t2, i64 5
-  %t17 = load i32, i32* %t16, align 4
-  %t20 = getelementptr inbounds i32, i32* %t2, i64 3
-  %t21 = load i32, i32* %t20, align 4
-  %t22 = getelementptr inbounds i32, i32* %t2, i64 4
-  %t23 = load i32, i32* %t22, align 4
+  %t3 = load i32, ptr %t2, align 4
+  %t4 = getelementptr inbounds i32, ptr %t2, i64 7
+  %t5 = load i32, ptr %t4, align 4
+  %t8 = getelementptr inbounds i32, ptr %t2, i64 1
+  %t9 = load i32, ptr %t8, align 4
+  %t10 = getelementptr inbounds i32, ptr %t2, i64 6
+  %t11 = load i32, ptr %t10, align 4
+  %t14 = getelementptr inbounds i32, ptr %t2, i64 2
+  %t15 = load i32, ptr %t14, align 4
+  %t16 = getelementptr inbounds i32, ptr %t2, i64 5
+  %t17 = load i32, ptr %t16, align 4
+  %t20 = getelementptr inbounds i32, ptr %t2, i64 3
+  %t21 = load i32, ptr %t20, align 4
+  %t22 = getelementptr inbounds i32, ptr %t2, i64 4
+  %t23 = load i32, ptr %t22, align 4
   %t24 = add nsw i32 %t23, %t21
   %t25 = sub nsw i32 %t21, %t23
   %t27 = sub nsw i32 %t3, %t24
@@ -99,7 +98,6 @@ define void @test(i32* nocapture %t2) {
   %t71 = insertelement <8 x i32> %t70, i32 %t34, i32 6
   %t72 = insertelement <8 x i32> %t71, i32 %t49, i32 7
   %t76 = shl <8 x i32> %t72, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %t79 = bitcast i32* %t2 to <8 x i32>*
-  store <8 x i32> %t76, <8 x i32>* %t79, align 4
+  store <8 x i32> %t76, ptr %t2, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
index f80d73b8f2388..bea9ac26ba0d2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
@@ -1,23 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -S | FileCheck %s
 
-define void @test(i32* nocapture %t2) {
+define void @test(ptr nocapture %t2) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[T2:%.*]], align 4
-; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 7
-; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T4]], align 4
-; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 1
-; CHECK-NEXT:    [[T9:%.*]] = load i32, i32* [[T8]], align 4
-; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 6
-; CHECK-NEXT:    [[T11:%.*]] = load i32, i32* [[T10]], align 4
-; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 2
-; CHECK-NEXT:    [[T15:%.*]] = load i32, i32* [[T14]], align 4
-; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 5
-; CHECK-NEXT:    [[T17:%.*]] = load i32, i32* [[T16]], align 4
-; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 3
-; CHECK-NEXT:    [[T21:%.*]] = load i32, i32* [[T20]], align 4
-; CHECK-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, i32* [[T2]], i64 4
-; CHECK-NEXT:    [[T23:%.*]] = load i32, i32* [[T22]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T2:%.*]], align 4
+; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 7
+; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
+; CHECK-NEXT:    [[T8:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 1
+; CHECK-NEXT:    [[T9:%.*]] = load i32, ptr [[T8]], align 4
+; CHECK-NEXT:    [[T10:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 6
+; CHECK-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4
+; CHECK-NEXT:    [[T14:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 2
+; CHECK-NEXT:    [[T15:%.*]] = load i32, ptr [[T14]], align 4
+; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 5
+; CHECK-NEXT:    [[T17:%.*]] = load i32, ptr [[T16]], align 4
+; CHECK-NEXT:    [[T20:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 3
+; CHECK-NEXT:    [[T21:%.*]] = load i32, ptr [[T20]], align 4
+; CHECK-NEXT:    [[T22:%.*]] = getelementptr inbounds i32, ptr [[T2]], i64 4
+; CHECK-NEXT:    [[T23:%.*]] = load i32, ptr [[T22]], align 4
 ; CHECK-NEXT:    [[T24:%.*]] = add nsw i32 [[T23]], [[T21]]
 ; CHECK-NEXT:    [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]]
 ; CHECK-NEXT:    [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]]
@@ -48,25 +48,24 @@ define void @test(i32* nocapture %t2) {
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
 ; CHECK-NEXT:    [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
 ; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
-; CHECK-NEXT:    store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
+; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %t3 = load i32, i32* %t2, align 4
-  %t4 = getelementptr inbounds i32, i32* %t2, i64 7
-  %t5 = load i32, i32* %t4, align 4
-  %t8 = getelementptr inbounds i32, i32* %t2, i64 1
-  %t9 = load i32, i32* %t8, align 4
-  %t10 = getelementptr inbounds i32, i32* %t2, i64 6
-  %t11 = load i32, i32* %t10, align 4
-  %t14 = getelementptr inbounds i32, i32* %t2, i64 2
-  %t15 = load i32, i32* %t14, align 4
-  %t16 = getelementptr inbounds i32, i32* %t2, i64 5
-  %t17 = load i32, i32* %t16, align 4
-  %t20 = getelementptr inbounds i32, i32* %t2, i64 3
-  %t21 = load i32, i32* %t20, align 4
-  %t22 = getelementptr inbounds i32, i32* %t2, i64 4
-  %t23 = load i32, i32* %t22, align 4
+  %t3 = load i32, ptr %t2, align 4
+  %t4 = getelementptr inbounds i32, ptr %t2, i64 7
+  %t5 = load i32, ptr %t4, align 4
+  %t8 = getelementptr inbounds i32, ptr %t2, i64 1
+  %t9 = load i32, ptr %t8, align 4
+  %t10 = getelementptr inbounds i32, ptr %t2, i64 6
+  %t11 = load i32, ptr %t10, align 4
+  %t14 = getelementptr inbounds i32, ptr %t2, i64 2
+  %t15 = load i32, ptr %t14, align 4
+  %t16 = getelementptr inbounds i32, ptr %t2, i64 5
+  %t17 = load i32, ptr %t16, align 4
+  %t20 = getelementptr inbounds i32, ptr %t2, i64 3
+  %t21 = load i32, ptr %t20, align 4
+  %t22 = getelementptr inbounds i32, ptr %t2, i64 4
+  %t23 = load i32, ptr %t22, align 4
   %t24 = add nsw i32 %t23, %t21
   %t25 = sub nsw i32 %t21, %t23
   %t27 = sub nsw i32 %t3, %t24
@@ -95,7 +94,6 @@ define void @test(i32* nocapture %t2) {
   %t71 = insertelement <8 x i32> %t70, i32 %t34, i32 6
   %t72 = insertelement <8 x i32> %t71, i32 %t49, i32 7
   %t76 = shl <8 x i32> %t72, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %t79 = bitcast i32* %t2 to <8 x i32>*
-  store <8 x i32> %t76, <8 x i32>* %t79, align 4
+  store <8 x i32> %t76, ptr %t2, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index c62262732d08d..22cd408cd6dc7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -1,890 +1,842 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
 
-define void @add0(i32* noalias %dst, i32* noalias %src) {
+define void @add0(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @add0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 1, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %add = add nsw i32 %0, 1
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %add, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %add3 = add nsw i32 %1, 1
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %add3, i32* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %add3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %add6 = add nsw i32 %2, 2
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %add6, i32* %incdec.ptr4, align 4
-  %3 = load i32, i32* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %add6, ptr %incdec.ptr4, align 4
+  %3 = load i32, ptr %incdec.ptr5, align 4
   %add9 = add nsw i32 %3, 3
-  store i32 %add9, i32* %incdec.ptr7, align 4
+  store i32 %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @add1(i32* noalias %dst, i32* noalias %src) {
+define void @add1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @add1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SRC]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
-; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[INCDEC_PTR]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[INCDEC_PTR1]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[INCDEC_PTR5]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
 ; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP5]], 3
-; CHECK-NEXT:    store i32 [[ADD9]], i32* [[INCDEC_PTR7]], align 4
+; CHECK-NEXT:    store i32 [[ADD9]], ptr [[INCDEC_PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %0, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %0, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %add3 = add nsw i32 %1, 1
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %add3, i32* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %add3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %add6 = add nsw i32 %2, 2
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %add6, i32* %incdec.ptr4, align 4
-  %3 = load i32, i32* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %add6, ptr %incdec.ptr4, align 4
+  %3 = load i32, ptr %incdec.ptr5, align 4
   %add9 = add nsw i32 %3, 3
-  store i32 %add9, i32* %incdec.ptr7, align 4
+  store i32 %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @sub0(i32* noalias %dst, i32* noalias %src) {
+define void @sub0(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store i32 [[SUB]], i32* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[INCDEC_PTR2]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store i32 [[SUB]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[INCDEC_PTR3]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[INCDEC_PTR3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %sub = add nsw i32 %0, -1
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %sub, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %1, i32* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %1, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %sub5 = add nsw i32 %2, -2
-  %incdec.ptr6 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %sub5, i32* %incdec.ptr3, align 4
-  %3 = load i32, i32* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %sub5, ptr %incdec.ptr3, align 4
+  %3 = load i32, ptr %incdec.ptr4, align 4
   %sub8 = add nsw i32 %3, -3
-  store i32 %sub8, i32* %incdec.ptr6, align 4
+  store i32 %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @sub1(i32* noalias %dst, i32* noalias %src) {
+define void @sub1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 4, i32 -1, i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %add = add nsw i32 %0, 4
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %add, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %sub = add nsw i32 %1, -1
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %sub, i32* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %sub, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %sub5 = add nsw i32 %2, -2
-  %incdec.ptr6 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %sub5, i32* %incdec.ptr3, align 4
-  %3 = load i32, i32* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %sub5, ptr %incdec.ptr3, align 4
+  %3 = load i32, ptr %incdec.ptr4, align 4
   %sub8 = add nsw i32 %3, -3
-  store i32 %sub8, i32* %incdec.ptr6, align 4
+  store i32 %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @sub2(i32* noalias %dst, i32* noalias %src) {
+define void @sub2(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 -1, i32 -1, i32 -2, i32 -3>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %sub = add nsw i32 %0, -1
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %sub, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %sub3 = add nsw i32 %1, -1
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %sub3, i32* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %sub3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %sub6 = add nsw i32 %2, -2
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %sub6, i32* %incdec.ptr4, align 4
-  %3 = load i32, i32* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %sub6, ptr %incdec.ptr4, align 4
+  %3 = load i32, ptr %incdec.ptr5, align 4
   %sub9 = add nsw i32 %3, -3
-  store i32 %sub9, i32* %incdec.ptr7, align 4
+  store i32 %sub9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @addsub0(i32* noalias %dst, i32* noalias %src) {
+define void @addsub0(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @addsub0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[TMP0]], -1
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store i32 [[SUB]], i32* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[INCDEC_PTR2]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store i32 [[SUB]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <2 x i32> [[TMP3]], <i32 -2, i32 -3>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[INCDEC_PTR3]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[INCDEC_PTR3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %sub = add nsw i32 %0, -1
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %sub, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %1, i32* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %1, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %sub5 = add nsw i32 %2, -2
-  %incdec.ptr6 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %sub5, i32* %incdec.ptr3, align 4
-  %3 = load i32, i32* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %sub5, ptr %incdec.ptr3, align 4
+  %3 = load i32, ptr %incdec.ptr4, align 4
   %sub8 = sub nsw i32 %3, -3
-  store i32 %sub8, i32* %incdec.ptr6, align 4
+  store i32 %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @addsub1(i32* noalias %dst, i32* noalias %src) {
+define void @addsub1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @addsub1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 2
-; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
+; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> [[TMP1]], <i32 -1, i32 -1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[DST]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[INCDEC_PTR3]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR4]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[INCDEC_PTR3]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
 ; CHECK-NEXT:    [[SUB8:%.*]] = sub nsw i32 [[TMP7]], -3
-; CHECK-NEXT:    store i32 [[SUB8]], i32* [[INCDEC_PTR6]], align 4
+; CHECK-NEXT:    store i32 [[SUB8]], ptr [[INCDEC_PTR6]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %sub = add nsw i32 %0, -1
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %sub, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %sub1 = sub nsw i32 %1, -1
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %sub1, i32* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
-  %incdec.ptr6 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %2, i32* %incdec.ptr3, align 4
-  %3 = load i32, i32* %incdec.ptr4, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %sub1, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
+  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %2, ptr %incdec.ptr3, align 4
+  %3 = load i32, ptr %incdec.ptr4, align 4
   %sub8 = sub nsw i32 %3, -3
-  store i32 %sub8, i32* %incdec.ptr6, align 4
+  store i32 %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @mul(i32* noalias %dst, i32* noalias %src) {
+define void @mul(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @mul(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 2
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP1]], <i32 257, i32 -3>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[DST]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
-; CHECK-NEXT:    store i32 [[TMP4]], i32* [[INCDEC_PTR4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[INCDEC_PTR5]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP2]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[INCDEC_PTR4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR5]], align 4
 ; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[TMP5]], -9
-; CHECK-NEXT:    store i32 [[MUL9]], i32* [[INCDEC_PTR7]], align 4
+; CHECK-NEXT:    store i32 [[MUL9]], ptr [[INCDEC_PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %mul = mul nsw i32 %0, 257
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %mul, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %mul, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %mul3 = mul nsw i32 %1, -3
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %mul3, i32* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %2, i32* %incdec.ptr4, align 4
-  %3 = load i32, i32* %incdec.ptr5, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %mul3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %2, ptr %incdec.ptr4, align 4
+  %3 = load i32, ptr %incdec.ptr5, align 4
   %mul9 = mul nsw i32 %3, -9
-  store i32 %mul9, i32* %incdec.ptr7, align 4
+  store i32 %mul9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @shl0(i32* noalias %dst, i32* noalias %src) {
+define void @shl0(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @shl0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SRC]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
-; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[INCDEC_PTR]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP2]], <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[INCDEC_PTR1]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[INCDEC_PTR4]], align 4
+; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR4]], align 4
 ; CHECK-NEXT:    [[SHL8:%.*]] = shl i32 [[TMP5]], 3
-; CHECK-NEXT:    store i32 [[SHL8]], i32* [[INCDEC_PTR6]], align 4
+; CHECK-NEXT:    store i32 [[SHL8]], ptr [[INCDEC_PTR6]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %0, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %0, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %shl = shl i32 %1, 1
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %shl, i32* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %shl, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %shl5 = shl i32 %2, 2
-  %incdec.ptr6 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %shl5, i32* %incdec.ptr3, align 4
-  %3 = load i32, i32* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %shl5, ptr %incdec.ptr3, align 4
+  %3 = load i32, ptr %incdec.ptr4, align 4
   %shl8 = shl i32 %3, 3
-  store i32 %shl8, i32* %incdec.ptr6, align 4
+  store i32 %shl8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @shl1(i32* noalias %dst, i32* noalias %src) {
+define void @shl1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @shl1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 7, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[DST:%.*]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %0 = load i32, i32* %src, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %0 = load i32, ptr %src, align 4
   %shl = shl i32 %0, 7
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %dst, i64 1
-  store i32 %shl, i32* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %src, i64 2
-  %1 = load i32, i32* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %dst, i64 1
+  store i32 %shl, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %src, i64 2
+  %1 = load i32, ptr %incdec.ptr, align 4
   %shl3 = shl i32 %1, 1
-  %incdec.ptr4 = getelementptr inbounds i32, i32* %dst, i64 2
-  store i32 %shl3, i32* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds i32, i32* %src, i64 3
-  %2 = load i32, i32* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds i32, ptr %dst, i64 2
+  store i32 %shl3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds i32, ptr %src, i64 3
+  %2 = load i32, ptr %incdec.ptr2, align 4
   %shl6 = shl i32 %2, 2
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %dst, i64 3
-  store i32 %shl6, i32* %incdec.ptr4, align 4
-  %3 = load i32, i32* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %dst, i64 3
+  store i32 %shl6, ptr %incdec.ptr4, align 4
+  %3 = load i32, ptr %incdec.ptr5, align 4
   %shl9 = shl i32 %3, 3
-  store i32 %shl9, i32* %incdec.ptr7, align 4
+  store i32 %shl9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @add0f(float* noalias %dst, float* noalias %src) {
+define void @add0f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @add0f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %add = fadd fast float %0, 1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %add, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %add3 = fadd fast float %1, 1.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %add3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %add3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %add6 = fadd fast float %2, 2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %add6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %add6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %add9 = fadd fast float %3, 3.000000e+00
-  store float %add9, float* %incdec.ptr7, align 4
+  store float %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @add1f(float* noalias %dst, float* noalias %src) {
+define void @add1f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @add1f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[SRC]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store float [[TMP0]], float* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3
-; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[INCDEC_PTR]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store float [[TMP0]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x float> [[TMP2]], <float 1.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[INCDEC_PTR1]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[INCDEC_PTR5]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
 ; CHECK-NEXT:    [[ADD9:%.*]] = fadd fast float [[TMP5]], 3.000000e+00
-; CHECK-NEXT:    store float [[ADD9]], float* [[INCDEC_PTR7]], align 4
+; CHECK-NEXT:    store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %0, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %0, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %add3 = fadd fast float %1, 1.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %add3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %add3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %add6 = fadd fast float %2, 2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %add6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %add6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %add9 = fadd fast float %3, 3.000000e+00
-  store float %add9, float* %incdec.ptr7, align 4
+  store float %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @sub0f(float* noalias %dst, float* noalias %src) {
+define void @sub0f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub0f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store float [[ADD]], float* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[INCDEC_PTR]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2
-; CHECK-NEXT:    store float [[TMP1]], float* [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[INCDEC_PTR2]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store float [[ADD]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
+; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[INCDEC_PTR4]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP4]], ptr [[INCDEC_PTR4]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %add = fadd fast float %0, -1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %add, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %1, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %1, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %add6 = fadd fast float %2, -2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %add6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %add6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %add9 = fadd fast float %3, -3.000000e+00
-  store float %add9, float* %incdec.ptr7, align 4
+  store float %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @sub1f(float* noalias %dst, float* noalias %src) {
+define void @sub1f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub1f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %add = fadd fast float %0, 4.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %add, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub = fadd fast float %1, -1.000000e+00
-  %incdec.ptr3 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub, float* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %sub5 = fadd fast float %2, -2.000000e+00
-  %incdec.ptr6 = getelementptr inbounds float, float* %dst, i64 3
-  store float %sub5, float* %incdec.ptr3, align 4
-  %3 = load float, float* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %sub5, ptr %incdec.ptr3, align 4
+  %3 = load float, ptr %incdec.ptr4, align 4
   %sub8 = fadd fast float %3, -3.000000e+00
-  store float %sub8, float* %incdec.ptr6, align 4
+  store float %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @sub2f(float* noalias %dst, float* noalias %src) {
+define void @sub2f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub2f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %sub = fadd fast float %0, -1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %sub, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub3 = fadd fast float %1, -1.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %sub6 = fadd fast float %2, -2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %sub6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %sub6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %sub9 = fadd fast float %3, -3.000000e+00
-  store float %sub9, float* %incdec.ptr7, align 4
+  store float %sub9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @addsub0f(float* noalias %dst, float* noalias %src) {
+define void @addsub0f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @addsub0f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[SUB:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store float [[SUB]], float* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[INCDEC_PTR]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2
-; CHECK-NEXT:    store float [[TMP1]], float* [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[INCDEC_PTR2]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store float [[SUB]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
+; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[INCDEC_PTR3]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP6]], <2 x float>* [[TMP7]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP6]], ptr [[INCDEC_PTR3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %sub = fadd fast float %0, -1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %sub, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
-  %incdec.ptr3 = getelementptr inbounds float, float* %dst, i64 2
-  store float %1, float* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
+  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %1, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %sub5 = fadd fast float %2, -2.000000e+00
-  %incdec.ptr6 = getelementptr inbounds float, float* %dst, i64 3
-  store float %sub5, float* %incdec.ptr3, align 4
-  %3 = load float, float* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %sub5, ptr %incdec.ptr3, align 4
+  %3 = load float, ptr %incdec.ptr4, align 4
   %sub8 = fsub fast float %3, -3.000000e+00
-  store float %sub8, float* %incdec.ptr6, align 4
+  store float %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @addsub1f(float* noalias %dst, float* noalias %src) {
+define void @addsub1f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @addsub1f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 2
-; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
+; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub fast <2 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[DST]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3
-; CHECK-NEXT:    store float [[TMP6]], float* [[INCDEC_PTR3]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[INCDEC_PTR4]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
+; CHECK-NEXT:    store float [[TMP6]], ptr [[INCDEC_PTR3]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[INCDEC_PTR4]], align 4
 ; CHECK-NEXT:    [[SUB8:%.*]] = fsub fast float [[TMP7]], -3.000000e+00
-; CHECK-NEXT:    store float [[SUB8]], float* [[INCDEC_PTR6]], align 4
+; CHECK-NEXT:    store float [[SUB8]], ptr [[INCDEC_PTR6]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %sub = fadd fast float %0, -1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %sub, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub1 = fsub fast float %1, -1.000000e+00
-  %incdec.ptr3 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub1, float* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
-  %incdec.ptr6 = getelementptr inbounds float, float* %dst, i64 3
-  store float %2, float* %incdec.ptr3, align 4
-  %3 = load float, float* %incdec.ptr4, align 4
+  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub1, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
+  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %2, ptr %incdec.ptr3, align 4
+  %3 = load float, ptr %incdec.ptr4, align 4
   %sub8 = fsub fast float %3, -3.000000e+00
-  store float %sub8, float* %incdec.ptr6, align 4
+  store float %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @mulf(float* noalias %dst, float* noalias %src) {
+define void @mulf(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @mulf(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 2
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], <float 2.570000e+02, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP2]], <2 x float>* [[TMP3]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3
-; CHECK-NEXT:    store float [[TMP4]], float* [[INCDEC_PTR4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[INCDEC_PTR5]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
+; CHECK-NEXT:    store float [[TMP4]], ptr [[INCDEC_PTR4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
 ; CHECK-NEXT:    [[SUB9:%.*]] = fmul fast float [[TMP5]], -9.000000e+00
-; CHECK-NEXT:    store float [[SUB9]], float* [[INCDEC_PTR7]], align 4
+; CHECK-NEXT:    store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %sub = fmul fast float %0, 2.570000e+02
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %sub, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub3 = fmul fast float %1, -3.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %2, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %2, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %sub9 = fmul fast float %3, -9.000000e+00
-  store float %sub9, float* %incdec.ptr7, align 4
+  store float %sub9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @add0fn(float* noalias %dst, float* noalias %src) {
+define void @add0fn(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @add0fn(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %add = fadd float %0, 1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %add, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %add3 = fadd float %1, 1.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %add3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %add3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %add6 = fadd float %2, 2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %add6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %add6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %add9 = fadd float %3, 3.000000e+00
-  store float %add9, float* %incdec.ptr7, align 4
+  store float %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @add1fn(float* noalias %dst, float* noalias %src) {
+define void @add1fn(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @add1fn(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[SRC]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store float [[TMP0]], float* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3
-; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[INCDEC_PTR]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store float [[TMP0]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP2]], <float 1.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[INCDEC_PTR1]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[INCDEC_PTR5]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
 ; CHECK-NEXT:    [[ADD9:%.*]] = fadd float [[TMP5]], 3.000000e+00
-; CHECK-NEXT:    store float [[ADD9]], float* [[INCDEC_PTR7]], align 4
+; CHECK-NEXT:    store float [[ADD9]], ptr [[INCDEC_PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %0, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %0, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %add3 = fadd float %1, 1.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %add3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %add3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %add6 = fadd float %2, 2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %add6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %add6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %add9 = fadd float %3, 3.000000e+00
-  store float %add9, float* %incdec.ptr7, align 4
+  store float %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @sub0fn(float* noalias %dst, float* noalias %src) {
+define void @sub0fn(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub0fn(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[SRC]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], -1.000000e+00
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 1
-; CHECK-NEXT:    store float [[ADD]], float* [[DST]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 2
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[INCDEC_PTR]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST]], i64 2
-; CHECK-NEXT:    store float [[TMP1]], float* [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[INCDEC_PTR2]] to <2 x float>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 1
+; CHECK-NEXT:    store float [[ADD]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2
+; CHECK-NEXT:    store float [[TMP1]], ptr [[INCDEC_PTR1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr [[INCDEC_PTR2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], <float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[INCDEC_PTR4]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP4]], ptr [[INCDEC_PTR4]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %add = fadd fast float %0, -1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %add, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %1, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %1, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %add6 = fadd float %2, -2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %add6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %add6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %add9 = fadd float %3, -3.000000e+00
-  store float %add9, float* %incdec.ptr7, align 4
+  store float %add9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @sub1fn(float* noalias %dst, float* noalias %src) {
+define void @sub1fn(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub1fn(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 4.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %add = fadd float %0, 4.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %add, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %add, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub = fadd float %1, -1.000000e+00
-  %incdec.ptr3 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub, float* %incdec.ptr1, align 4
-  %incdec.ptr4 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub, ptr %incdec.ptr1, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %sub5 = fadd float %2, -2.000000e+00
-  %incdec.ptr6 = getelementptr inbounds float, float* %dst, i64 3
-  store float %sub5, float* %incdec.ptr3, align 4
-  %3 = load float, float* %incdec.ptr4, align 4
+  %incdec.ptr6 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %sub5, ptr %incdec.ptr3, align 4
+  %3 = load float, ptr %incdec.ptr4, align 4
   %sub8 = fadd float %3, -3.000000e+00
-  store float %sub8, float* %incdec.ptr6, align 4
+  store float %sub8, ptr %incdec.ptr6, align 4
   ret void
 }
 
-define void @sub2fn(float* noalias %dst, float* noalias %src) {
+define void @sub2fn(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @sub2fn(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float -1.000000e+00, float -1.000000e+00, float -2.000000e+00, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %sub = fadd float %0, -1.000000e+00
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %sub, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub3 = fadd float %1, -1.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
   %sub6 = fadd float %2, -2.000000e+00
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %sub6, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %sub6, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %sub9 = fadd float %3, -3.000000e+00
-  store float %sub9, float* %incdec.ptr7, align 4
+  store float %sub9, ptr %incdec.ptr7, align 4
   ret void
 }
 
-define void @mulfn(float* noalias %dst, float* noalias %src) {
+define void @mulfn(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-LABEL: @mulfn(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 2
-; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, float* [[DST:%.*]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[SRC]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
+; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.570000e+02, float -3.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[DST]] to <2 x float>*
-; CHECK-NEXT:    store <2 x float> [[TMP2]], <2 x float>* [[TMP3]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, float* [[SRC]], i64 3
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[INCDEC_PTR2]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3
-; CHECK-NEXT:    store float [[TMP4]], float* [[INCDEC_PTR4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[INCDEC_PTR5]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[INCDEC_PTR2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3
+; CHECK-NEXT:    store float [[TMP4]], ptr [[INCDEC_PTR4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[INCDEC_PTR5]], align 4
 ; CHECK-NEXT:    [[SUB9:%.*]] = fmul fast float [[TMP5]], -9.000000e+00
-; CHECK-NEXT:    store float [[SUB9]], float* [[INCDEC_PTR7]], align 4
+; CHECK-NEXT:    store float [[SUB9]], ptr [[INCDEC_PTR7]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %incdec.ptr = getelementptr inbounds float, float* %src, i64 1
-  %0 = load float, float* %src, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %src, i64 1
+  %0 = load float, ptr %src, align 4
   %sub = fmul float %0, 2.570000e+02
-  %incdec.ptr1 = getelementptr inbounds float, float* %dst, i64 1
-  store float %sub, float* %dst, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %src, i64 2
-  %1 = load float, float* %incdec.ptr, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %dst, i64 1
+  store float %sub, ptr %dst, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %src, i64 2
+  %1 = load float, ptr %incdec.ptr, align 4
   %sub3 = fmul float %1, -3.000000e+00
-  %incdec.ptr4 = getelementptr inbounds float, float* %dst, i64 2
-  store float %sub3, float* %incdec.ptr1, align 4
-  %incdec.ptr5 = getelementptr inbounds float, float* %src, i64 3
-  %2 = load float, float* %incdec.ptr2, align 4
-  %incdec.ptr7 = getelementptr inbounds float, float* %dst, i64 3
-  store float %2, float* %incdec.ptr4, align 4
-  %3 = load float, float* %incdec.ptr5, align 4
+  %incdec.ptr4 = getelementptr inbounds float, ptr %dst, i64 2
+  store float %sub3, ptr %incdec.ptr1, align 4
+  %incdec.ptr5 = getelementptr inbounds float, ptr %src, i64 3
+  %2 = load float, ptr %incdec.ptr2, align 4
+  %incdec.ptr7 = getelementptr inbounds float, ptr %dst, i64 3
+  store float %2, ptr %incdec.ptr4, align 4
+  %3 = load float, ptr %incdec.ptr5, align 4
   %sub9 = fmul fast float %3, -9.000000e+00
-  store float %sub9, float* %incdec.ptr7, align 4
+  store float %sub9, ptr %incdec.ptr7, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
index a2b526ebb362d..b8c551c7b771d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -8,21 +8,21 @@ target triple = "x86_64-unknown-linux-gnu"
 ; The GEP has scalar and vector parameters and returns vector of pointers.
 
 ; Function Attrs: noreturn readonly uwtable
-define void @_Z3fn1v(i32 %x, <16 x i32*>%y) local_unnamed_addr #0 {
+define void @_Z3fn1v(i32 %x, <16 x ptr>%y) local_unnamed_addr #0 {
 ; CHECK-LABEL: @_Z3fn1v(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV42_LE:%.*]] = sext i32 [[X:%.*]] to i64
 ; CHECK-NEXT:    [[CONV36109_LE:%.*]] = zext i32 2 to i64
-; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr i32, <16 x i32*> [[Y:%.*]], i64 [[CONV36109_LE]]
-; CHECK-NEXT:    [[VECTORGEP208:%.*]] = getelementptr i32, <16 x i32*> [[Y]], i64 [[CONV42_LE]]
+; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr i32, <16 x ptr> [[Y:%.*]], i64 [[CONV36109_LE]]
+; CHECK-NEXT:    [[VECTORGEP208:%.*]] = getelementptr i32, <16 x ptr> [[Y]], i64 [[CONV42_LE]]
 ; CHECK-NEXT:    unreachable
 ;
 
 entry:
   %conv42.le = sext i32 %x to i64
   %conv36109.le = zext i32 2 to i64
-  %VectorGep = getelementptr i32, <16 x i32*> %y, i64 %conv36109.le
-  %VectorGep208 = getelementptr i32, <16 x i32*> %y, i64 %conv42.le
+  %VectorGep = getelementptr i32, <16 x ptr> %y, i64 %conv36109.le
+  %VectorGep208 = getelementptr i32, <16 x ptr> %y, i64 %conv42.le
   unreachable
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll
index f5908eaabd3e2..b2e0af3b58ce5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll
@@ -1,12 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s | FileCheck %s
 
-define i32 @test(float* %isec, float %0) {
+define i32 @test(ptr %isec, float %0) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[ISEC:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX5]] to <2 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[ISEC:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[CMP61:%.*]] = fcmp fast oge float 0.000000e+00, 0.000000e+00
@@ -20,10 +18,9 @@ define i32 @test(float* %isec, float %0) {
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
-  %arrayidx5 = getelementptr inbounds float, float* %isec, i64 0
-  %1 = load float, float* %arrayidx5, align 4
-  %arrayidx10 = getelementptr inbounds float, float* %isec, i64 1
-  %2 = load float, float* %arrayidx10, align 4
+  %1 = load float, ptr %isec, align 4
+  %arrayidx10 = getelementptr inbounds float, ptr %isec, i64 1
+  %2 = load float, ptr %arrayidx10, align 4
   %mul16 = fmul fast float %0, %2
   %mul55 = fmul fast float 0.000000e+00, %1
   %cmp61 = fcmp fast oge float 0.000000e+00, 0.000000e+00

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll
index 80dd1492013aa..69cdbbdbd098b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-alt-shuffle.ll
@@ -1,13 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
 
-define void @foo(i8* %c, float* %d) {
+define void @foo(ptr %c, ptr %d) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[C:%.*]], i64 1
-; CHECK-NEXT:    [[ADD_PTR53:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 -4
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ARRAYIDX4]] to <4 x i8>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[C:%.*]], i64 1
+; CHECK-NEXT:    [[ADD_PTR53:%.*]] = getelementptr inbounds float, ptr [[D:%.*]], i64 -4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[ARRAYIDX4]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 3>
@@ -15,46 +14,45 @@ define void @foo(i8* %c, float* %d) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <4 x i32> [[TMP6]] to <4 x float>
 ; CHECK-NEXT:    [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], undef
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[ADD_PTR53]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr [[ADD_PTR53]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx1 = getelementptr inbounds i8, i8* %c, i64 4
-  %0 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %c, i64 4
+  %0 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 3
-  %arrayidx4 = getelementptr inbounds i8, i8* %c, i64 1
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %c, i64 1
+  %1 = load i8, ptr %arrayidx4, align 1
   %conv5 = zext i8 %1 to i32
   %shl6 = shl nuw nsw i32 %conv5, 2
-  %arrayidx12 = getelementptr inbounds i8, i8* %c, i64 2
-  %2 = load i8, i8* %arrayidx12, align 1
+  %arrayidx12 = getelementptr inbounds i8, ptr %c, i64 2
+  %2 = load i8, ptr %arrayidx12, align 1
   %conv13 = zext i8 %2 to i32
   %shl14 = shl nuw nsw i32 %conv13, 2
-  %arrayidx17 = getelementptr inbounds i8, i8* %c, i64 3
-  %3 = load i8, i8* %arrayidx17, align 1
+  %arrayidx17 = getelementptr inbounds i8, ptr %c, i64 3
+  %3 = load i8, ptr %arrayidx17, align 1
   %conv18 = zext i8 %3 to i32
   %shl19 = shl nuw nsw i32 %conv18, 2
   %sub = add nsw i32 undef, %shl6
   %conv27 = sitofp i32 %sub to float
   %div = fdiv float %conv27, undef
-  %add.ptr = getelementptr inbounds float, float* %d, i64 -1
-  store float %div, float* %add.ptr, align 4
+  %add.ptr = getelementptr inbounds float, ptr %d, i64 -1
+  store float %div, ptr %add.ptr, align 4
   %sub32 = add nsw i32 undef, %and
   %conv33 = sitofp i32 %sub32 to float
   %div36 = fdiv float %conv33, undef
-  %add.ptr37 = getelementptr inbounds float, float* %d, i64 -2
-  store float %div36, float* %add.ptr37, align 4
+  %add.ptr37 = getelementptr inbounds float, ptr %d, i64 -2
+  store float %div36, ptr %add.ptr37, align 4
   %sub40 = add nsw i32 undef, %shl19
   %conv41 = sitofp i32 %sub40 to float
   %div44 = fdiv float %conv41, undef
-  %add.ptr45 = getelementptr inbounds float, float* %d, i64 -3
-  store float %div44, float* %add.ptr45, align 4
+  %add.ptr45 = getelementptr inbounds float, ptr %d, i64 -3
+  store float %div44, ptr %add.ptr45, align 4
   %sub48 = add nsw i32 undef, %shl14
   %conv49 = sitofp i32 %sub48 to float
   %div52 = fdiv float %conv49, undef
-  %add.ptr53 = getelementptr inbounds float, float* %d, i64 -4
-  store float %div52, float* %add.ptr53, align 4
+  %add.ptr53 = getelementptr inbounds float, ptr %d, i64 -4
+  store float %div52, ptr %add.ptr53, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
index 6dd5dc1b35af4..126b3c31ec596 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll
@@ -1,11 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
 
-define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
+define i32 @foo(ptr nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[ARR:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
@@ -20,15 +19,15 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4
 ; CHECK-NEXT:    ret i32 [[TMP11]]
 ;
 entry:
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i64 1
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add i32 %0, %a1
   %add2 = add i32 %0, %a2
   %add4 = add i32 %0, %a3
   %add6 = add i32 %0, %a4
   %add8 = add i32 %0, %a5
   %add10 = add i32 %0, %a6
-  %1 = load i32, i32* %arr, align 4
+  %1 = load i32, ptr %arr, align 4
   %add12 = add i32 %1, %a7
   %add14 = add i32 %1, %a8
   %cmp = icmp ult i32 %add, %add2
@@ -48,11 +47,10 @@ entry:
   ret i32 %cond44
 }
 
-define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
+define i32 @foo1(ptr nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
 ; CHECK-LABEL: @foo1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARR:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 1, i32 3, i32 1, i32 1, i32 0, i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
@@ -67,18 +65,18 @@ define i32 @foo1(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
 ; CHECK-NEXT:    ret i32 [[TMP11]]
 ;
 entry:
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i64 1
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add i32 %0, %a1
-  %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %arr, i64 2
+  %1 = load i32, ptr %arrayidx1, align 4
   %add2 = add i32 %1, %a2
-  %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 3
-  %2 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %arr, i64 3
+  %2 = load i32, ptr %arrayidx3, align 4
   %add4 = add i32 %2, %a3
   %add6 = add i32 %0, %a4
   %add8 = add i32 %0, %a5
-  %3 = load i32, i32* %arr, align 4
+  %3 = load i32, ptr %arr, align 4
   %add10 = add i32 %3, %a6
   %add12 = add i32 %1, %a7
   %add14 = add i32 %0, %a8
@@ -99,11 +97,10 @@ entry:
   ret i32 %cond44
 }
 
-define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
+define i32 @foo2(ptr nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
 ; CHECK-LABEL: @foo2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARR:%.*]], align 4
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 3, i32 0, i32 1, i32 0, i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
@@ -118,17 +115,17 @@ define i32 @foo2(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a
 ; CHECK-NEXT:    ret i32 [[TMP11]]
 ;
 entry:
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i64 3
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add i32 %0, %a1
-  %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %arr, i64 2
+  %1 = load i32, ptr %arrayidx1, align 4
   %add2 = add i32 %1, %a2
   %add4 = add i32 %0, %a3
-  %2 = load i32, i32* %arr, align 4
+  %2 = load i32, ptr %arr, align 4
   %add6 = add i32 %2, %a4
-  %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 1
-  %3 = load i32, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %arr, i64 1
+  %3 = load i32, ptr %arrayidx7, align 4
   %add8 = add i32 %3, %a5
   %add10 = add i32 %2, %a6
   %add12 = add i32 %1, %a7

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll
index 18a97c0848831..4c3c712419a2e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reordered-list.ll
@@ -1,34 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s | FileCheck %s
 
-define void @test(double* %isec) {
+define void @test(ptr %isec) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[ISEC:%.*]], i64 0
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[ISEC]], i64 2
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX10]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[ISEC:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ISEC]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[ARRAYIDX10]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP6]], ptr [[ISEC]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %arrayidx5 = getelementptr inbounds double, double* %isec, i64 1
-  %0 = load double, double* %arrayidx5, align 8
-  %arrayidx10 = getelementptr inbounds double, double* %isec, i64 0
-  %1 = load double, double* %arrayidx10, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %isec, i64 3
-  %2 = load double, double* %arrayidx3, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %isec, i64 2
-  %3 = load double, double* %arrayidx2, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %isec, i64 1
+  %0 = load double, ptr %arrayidx5, align 8
+  %1 = load double, ptr %isec, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %isec, i64 3
+  %2 = load double, ptr %arrayidx3, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %isec, i64 2
+  %3 = load double, ptr %arrayidx2, align 8
   %4 = fadd double %0, %2
   %5 = fsub double %1, %3
-  store double %4, double *%arrayidx10
-  store double %5, double *%arrayidx5
+  store double %4, ptr %isec
+  store double %5, ptr %arrayidx5
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
index 9f5b8ca0e8d1e..666a2ec85c0fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
@@ -13,7 +13,7 @@ define void @foo() {
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = load double, double* undef, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr undef, align 8
 ; CHECK-NEXT:    br i1 undef, label [[BB3]], label [[BB4:%.*]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double>
@@ -45,7 +45,7 @@ bb2:
   %1 = phi float [ %conv, %bb1 ], [ %10, %bb3 ]
   %2 = phi float [ undef, %bb1 ], [ %11, %bb3 ]
   %3 = phi float [ undef, %bb1 ], [ %12, %bb3 ]
-  %4 = load double, double* undef, align 8
+  %4 = load double, ptr undef, align 8
   br i1 undef, label %bb3, label %bb4
 
 bb4:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
index 968fbf42d2142..d1f6c41e5c30e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
@@ -10,11 +10,11 @@
 ; vXi8
 ;
 
-define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
+define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, i8* [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, i8* [[P1]], align 1
+; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
+; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
 ; SSE2-NEXT:    [[X0:%.*]] = zext i8 [[I0]] to i64
 ; SSE2-NEXT:    [[X1:%.*]] = zext i8 [[I1]] to i64
 ; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
@@ -22,20 +22,18 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
 ; SSE2-NEXT:    ret <2 x i64> [[V1]]
 ;
 ; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i8_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
   %x0 = zext i8 %i0 to i64
   %x1 = zext i8 %i1 to i64
   %v0 = insertelement <2 x i64> poison, i64 %x0, i32 0
@@ -43,32 +41,29 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
+define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i8_to_4i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; SSE2-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i8_to_4i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; SLM-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = zext i8 %i0 to i32
   %x1 = zext i8 %i1 to i32
   %x2 = zext i8 %i2 to i32
@@ -80,32 +75,29 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
+define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i8_to_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
 ; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i8_to_4i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
 ; SLM-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = zext i8 %i0 to i64
   %x1 = zext i8 %i1 to i64
   %x2 = zext i8 %i2 to i64
@@ -117,40 +109,37 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
+define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
 ; SSE2-LABEL: @loadext_8i8_to_8i16(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; SSE2-NEXT:    ret <8 x i16> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_8i8_to_8i16(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; SLM-NEXT:    ret <8 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; AVX-NEXT:    ret <8 x i16> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = zext i8 %i0 to i16
   %x1 = zext i8 %i1 to i16
   %x2 = zext i8 %i2 to i16
@@ -170,40 +159,37 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
   ret <8 x i16> %v7
 }
 
-define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
+define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_8i8_to_8i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
 ; SSE2-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_8i8_to_8i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
 ; SLM-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = zext i8 %i0 to i32
   %x1 = zext i8 %i1 to i32
   %x2 = zext i8 %i2 to i32
@@ -223,56 +209,53 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
   ret <8 x i32> %v7
 }
 
-define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
+define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
 ; SSE2-LABEL: @loadext_16i8_to_16i16(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; SSE2-NEXT:    ret <16 x i16> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_16i8_to_16i16(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; SLM-NEXT:    ret <16 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_16i8_to_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; AVX-NEXT:    ret <16 x i16> [[TMP3]]
 ;
-  %p1  = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2  = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3  = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4  = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5  = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6  = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7  = getelementptr inbounds i8, i8* %p0, i64 7
-  %p8  = getelementptr inbounds i8, i8* %p0, i64 8
-  %p9  = getelementptr inbounds i8, i8* %p0, i64 9
-  %p10 = getelementptr inbounds i8, i8* %p0, i64 10
-  %p11 = getelementptr inbounds i8, i8* %p0, i64 11
-  %p12 = getelementptr inbounds i8, i8* %p0, i64 12
-  %p13 = getelementptr inbounds i8, i8* %p0, i64 13
-  %p14 = getelementptr inbounds i8, i8* %p0, i64 14
-  %p15 = getelementptr inbounds i8, i8* %p0, i64 15
-  %i0  = load i8, i8* %p0,  align 1
-  %i1  = load i8, i8* %p1,  align 1
-  %i2  = load i8, i8* %p2,  align 1
-  %i3  = load i8, i8* %p3,  align 1
-  %i4  = load i8, i8* %p4,  align 1
-  %i5  = load i8, i8* %p5,  align 1
-  %i6  = load i8, i8* %p6,  align 1
-  %i7  = load i8, i8* %p7,  align 1
-  %i8  = load i8, i8* %p8,  align 1
-  %i9  = load i8, i8* %p9,  align 1
-  %i10 = load i8, i8* %p10, align 1
-  %i11 = load i8, i8* %p11, align 1
-  %i12 = load i8, i8* %p12, align 1
-  %i13 = load i8, i8* %p13, align 1
-  %i14 = load i8, i8* %p14, align 1
-  %i15 = load i8, i8* %p15, align 1
+  %p1  = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2  = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3  = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4  = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5  = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6  = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7  = getelementptr inbounds i8, ptr %p0, i64 7
+  %p8  = getelementptr inbounds i8, ptr %p0, i64 8
+  %p9  = getelementptr inbounds i8, ptr %p0, i64 9
+  %p10 = getelementptr inbounds i8, ptr %p0, i64 10
+  %p11 = getelementptr inbounds i8, ptr %p0, i64 11
+  %p12 = getelementptr inbounds i8, ptr %p0, i64 12
+  %p13 = getelementptr inbounds i8, ptr %p0, i64 13
+  %p14 = getelementptr inbounds i8, ptr %p0, i64 14
+  %p15 = getelementptr inbounds i8, ptr %p0, i64 15
+  %i0  = load i8, ptr %p0,  align 1
+  %i1  = load i8, ptr %p1,  align 1
+  %i2  = load i8, ptr %p2,  align 1
+  %i3  = load i8, ptr %p3,  align 1
+  %i4  = load i8, ptr %p4,  align 1
+  %i5  = load i8, ptr %p5,  align 1
+  %i6  = load i8, ptr %p6,  align 1
+  %i7  = load i8, ptr %p7,  align 1
+  %i8  = load i8, ptr %p8,  align 1
+  %i9  = load i8, ptr %p9,  align 1
+  %i10 = load i8, ptr %p10, align 1
+  %i11 = load i8, ptr %p11, align 1
+  %i12 = load i8, ptr %p12, align 1
+  %i13 = load i8, ptr %p13, align 1
+  %i14 = load i8, ptr %p14, align 1
+  %i15 = load i8, ptr %p15, align 1
   %x0  = zext i8 %i0  to i16
   %x1  = zext i8 %i1  to i16
   %x2  = zext i8 %i2  to i16
@@ -312,28 +295,25 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
 ; vXi16
 ;
 
-define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
+define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i16_to_2i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
 ; SSE2-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_2i16_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i16_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
   %x0 = zext i16 %i0 to i64
   %x1 = zext i16 %i1 to i64
   %v0 = insertelement <2 x i64> poison, i64 %x0, i32 0
@@ -341,32 +321,29 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
+define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i16_to_4i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; SSE2-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i16_to_4i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; SLM-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = zext i16 %i0 to i32
   %x1 = zext i16 %i1 to i32
   %x2 = zext i16 %i2 to i32
@@ -378,32 +355,29 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
+define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i16_to_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
 ; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i16_to_4i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
 ; SLM-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = zext i16 %i0 to i64
   %x1 = zext i16 %i1 to i64
   %x2 = zext i16 %i2 to i64
@@ -415,40 +389,37 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
+define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_8i16_to_8i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
 ; SSE2-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_8i16_to_8i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
 ; SLM-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i16_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %p4 = getelementptr inbounds i16, i16* %p0, i64 4
-  %p5 = getelementptr inbounds i16, i16* %p0, i64 5
-  %p6 = getelementptr inbounds i16, i16* %p0, i64 6
-  %p7 = getelementptr inbounds i16, i16* %p0, i64 7
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
-  %i4 = load i16, i16* %p4, align 1
-  %i5 = load i16, i16* %p5, align 1
-  %i6 = load i16, i16* %p6, align 1
-  %i7 = load i16, i16* %p7, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
+  %i4 = load i16, ptr %p4, align 1
+  %i5 = load i16, ptr %p5, align 1
+  %i6 = load i16, ptr %p6, align 1
+  %i7 = load i16, ptr %p7, align 1
   %x0 = zext i16 %i0 to i32
   %x1 = zext i16 %i1 to i32
   %x2 = zext i16 %i2 to i32
@@ -472,28 +443,25 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
 ; vXi32
 ;
 
-define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
+define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i32_to_2i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
 ; SSE2-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_2i32_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i32_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
   %x0 = zext i32 %i0 to i64
   %x1 = zext i32 %i1 to i64
   %v0 = insertelement <2 x i64> poison, i64 %x0, i32 0
@@ -501,32 +469,29 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
+define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i32_to_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
 ; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i32_to_4i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
 ; SLM-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i32_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %p2 = getelementptr inbounds i32, i32* %p0, i64 2
-  %p3 = getelementptr inbounds i32, i32* %p0, i64 3
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
-  %i2 = load i32, i32* %p2, align 1
-  %i3 = load i32, i32* %p3, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
+  %i2 = load i32, ptr %p2, align 1
+  %i3 = load i32, ptr %p3, align 1
   %x0 = zext i32 %i0 to i64
   %x1 = zext i32 %i1 to i64
   %x2 = zext i32 %i2 to i64

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
index 5882b32760dde..829e4bab20ffa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
@@ -10,11 +10,11 @@
 ; vXi8
 ;
 
-define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
+define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
-; SSE2-NEXT:    [[I0:%.*]] = load i8, i8* [[P0]], align 1
-; SSE2-NEXT:    [[I1:%.*]] = load i8, i8* [[P1]], align 1
+; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT:    [[I0:%.*]] = load i8, ptr [[P0]], align 1
+; SSE2-NEXT:    [[I1:%.*]] = load i8, ptr [[P1]], align 1
 ; SSE2-NEXT:    [[X0:%.*]] = zext i8 [[I0]] to i64
 ; SSE2-NEXT:    [[X1:%.*]] = zext i8 [[I1]] to i64
 ; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
@@ -22,20 +22,18 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
 ; SSE2-NEXT:    ret <2 x i64> [[V1]]
 ;
 ; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i8_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
   %x0 = zext i8 %i0 to i64
   %x1 = zext i8 %i1 to i64
   %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
@@ -43,32 +41,29 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
+define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i8_to_4i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; SSE2-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i8_to_4i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; SLM-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = zext i8 %i0 to i32
   %x1 = zext i8 %i1 to i32
   %x2 = zext i8 %i2 to i32
@@ -80,32 +75,29 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
+define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i8_to_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
 ; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i8_to_4i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
 ; SLM-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i8_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
   %x0 = zext i8 %i0 to i64
   %x1 = zext i8 %i1 to i64
   %x2 = zext i8 %i2 to i64
@@ -117,40 +109,37 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
+define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) {
 ; SSE2-LABEL: @loadext_8i8_to_8i16(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; SSE2-NEXT:    ret <8 x i16> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_8i8_to_8i16(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; SLM-NEXT:    ret <8 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
 ; AVX-NEXT:    ret <8 x i16> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = zext i8 %i0 to i16
   %x1 = zext i8 %i1 to i16
   %x2 = zext i8 %i2 to i16
@@ -170,40 +159,37 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
   ret <8 x i16> %v7
 }
 
-define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
+define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_8i8_to_8i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
 ; SSE2-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_8i8_to_8i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
 ; SLM-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i8_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
-  %i0 = load i8, i8* %p0, align 1
-  %i1 = load i8, i8* %p1, align 1
-  %i2 = load i8, i8* %p2, align 1
-  %i3 = load i8, i8* %p3, align 1
-  %i4 = load i8, i8* %p4, align 1
-  %i5 = load i8, i8* %p5, align 1
-  %i6 = load i8, i8* %p6, align 1
-  %i7 = load i8, i8* %p7, align 1
+  %p1 = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i8, ptr %p0, i64 7
+  %i0 = load i8, ptr %p0, align 1
+  %i1 = load i8, ptr %p1, align 1
+  %i2 = load i8, ptr %p2, align 1
+  %i3 = load i8, ptr %p3, align 1
+  %i4 = load i8, ptr %p4, align 1
+  %i5 = load i8, ptr %p5, align 1
+  %i6 = load i8, ptr %p6, align 1
+  %i7 = load i8, ptr %p7, align 1
   %x0 = zext i8 %i0 to i32
   %x1 = zext i8 %i1 to i32
   %x2 = zext i8 %i2 to i32
@@ -223,56 +209,53 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
   ret <8 x i32> %v7
 }
 
-define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
+define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) {
 ; SSE2-LABEL: @loadext_16i8_to_16i16(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; SSE2-NEXT:    ret <16 x i16> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_16i8_to_16i16(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; SLM-NEXT:    ret <16 x i16> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_16i8_to_16i16(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16>
 ; AVX-NEXT:    ret <16 x i16> [[TMP3]]
 ;
-  %p1  = getelementptr inbounds i8, i8* %p0, i64 1
-  %p2  = getelementptr inbounds i8, i8* %p0, i64 2
-  %p3  = getelementptr inbounds i8, i8* %p0, i64 3
-  %p4  = getelementptr inbounds i8, i8* %p0, i64 4
-  %p5  = getelementptr inbounds i8, i8* %p0, i64 5
-  %p6  = getelementptr inbounds i8, i8* %p0, i64 6
-  %p7  = getelementptr inbounds i8, i8* %p0, i64 7
-  %p8  = getelementptr inbounds i8, i8* %p0, i64 8
-  %p9  = getelementptr inbounds i8, i8* %p0, i64 9
-  %p10 = getelementptr inbounds i8, i8* %p0, i64 10
-  %p11 = getelementptr inbounds i8, i8* %p0, i64 11
-  %p12 = getelementptr inbounds i8, i8* %p0, i64 12
-  %p13 = getelementptr inbounds i8, i8* %p0, i64 13
-  %p14 = getelementptr inbounds i8, i8* %p0, i64 14
-  %p15 = getelementptr inbounds i8, i8* %p0, i64 15
-  %i0  = load i8, i8* %p0,  align 1
-  %i1  = load i8, i8* %p1,  align 1
-  %i2  = load i8, i8* %p2,  align 1
-  %i3  = load i8, i8* %p3,  align 1
-  %i4  = load i8, i8* %p4,  align 1
-  %i5  = load i8, i8* %p5,  align 1
-  %i6  = load i8, i8* %p6,  align 1
-  %i7  = load i8, i8* %p7,  align 1
-  %i8  = load i8, i8* %p8,  align 1
-  %i9  = load i8, i8* %p9,  align 1
-  %i10 = load i8, i8* %p10, align 1
-  %i11 = load i8, i8* %p11, align 1
-  %i12 = load i8, i8* %p12, align 1
-  %i13 = load i8, i8* %p13, align 1
-  %i14 = load i8, i8* %p14, align 1
-  %i15 = load i8, i8* %p15, align 1
+  %p1  = getelementptr inbounds i8, ptr %p0, i64 1
+  %p2  = getelementptr inbounds i8, ptr %p0, i64 2
+  %p3  = getelementptr inbounds i8, ptr %p0, i64 3
+  %p4  = getelementptr inbounds i8, ptr %p0, i64 4
+  %p5  = getelementptr inbounds i8, ptr %p0, i64 5
+  %p6  = getelementptr inbounds i8, ptr %p0, i64 6
+  %p7  = getelementptr inbounds i8, ptr %p0, i64 7
+  %p8  = getelementptr inbounds i8, ptr %p0, i64 8
+  %p9  = getelementptr inbounds i8, ptr %p0, i64 9
+  %p10 = getelementptr inbounds i8, ptr %p0, i64 10
+  %p11 = getelementptr inbounds i8, ptr %p0, i64 11
+  %p12 = getelementptr inbounds i8, ptr %p0, i64 12
+  %p13 = getelementptr inbounds i8, ptr %p0, i64 13
+  %p14 = getelementptr inbounds i8, ptr %p0, i64 14
+  %p15 = getelementptr inbounds i8, ptr %p0, i64 15
+  %i0  = load i8, ptr %p0,  align 1
+  %i1  = load i8, ptr %p1,  align 1
+  %i2  = load i8, ptr %p2,  align 1
+  %i3  = load i8, ptr %p3,  align 1
+  %i4  = load i8, ptr %p4,  align 1
+  %i5  = load i8, ptr %p5,  align 1
+  %i6  = load i8, ptr %p6,  align 1
+  %i7  = load i8, ptr %p7,  align 1
+  %i8  = load i8, ptr %p8,  align 1
+  %i9  = load i8, ptr %p9,  align 1
+  %i10 = load i8, ptr %p10, align 1
+  %i11 = load i8, ptr %p11, align 1
+  %i12 = load i8, ptr %p12, align 1
+  %i13 = load i8, ptr %p13, align 1
+  %i14 = load i8, ptr %p14, align 1
+  %i15 = load i8, ptr %p15, align 1
   %x0  = zext i8 %i0  to i16
   %x1  = zext i8 %i1  to i16
   %x2  = zext i8 %i2  to i16
@@ -312,28 +295,25 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
 ; vXi16
 ;
 
-define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
+define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i16_to_2i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
 ; SSE2-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_2i16_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i16_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
   %x0 = zext i16 %i0 to i64
   %x1 = zext i16 %i1 to i64
   %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
@@ -341,32 +321,29 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
+define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i16_to_4i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; SSE2-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i16_to_4i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; SLM-NEXT:    ret <4 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; AVX-NEXT:    ret <4 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = zext i16 %i0 to i32
   %x1 = zext i16 %i1 to i32
   %x2 = zext i16 %i2 to i32
@@ -378,32 +355,29 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
   ret <4 x i32> %v3
 }
 
-define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
+define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i16_to_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
 ; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i16_to_4i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
 ; SLM-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i16_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
   %x0 = zext i16 %i0 to i64
   %x1 = zext i16 %i1 to i64
   %x2 = zext i16 %i2 to i64
@@ -415,40 +389,37 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
   ret <4 x i64> %v3
 }
 
-define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
+define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) {
 ; SSE2-LABEL: @loadext_8i16_to_8i32(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
 ; SSE2-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_8i16_to_8i32(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
 ; SLM-NEXT:    ret <8 x i32> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_8i16_to_8i32(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
 ; AVX-NEXT:    ret <8 x i32> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
-  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
-  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
-  %p4 = getelementptr inbounds i16, i16* %p0, i64 4
-  %p5 = getelementptr inbounds i16, i16* %p0, i64 5
-  %p6 = getelementptr inbounds i16, i16* %p0, i64 6
-  %p7 = getelementptr inbounds i16, i16* %p0, i64 7
-  %i0 = load i16, i16* %p0, align 1
-  %i1 = load i16, i16* %p1, align 1
-  %i2 = load i16, i16* %p2, align 1
-  %i3 = load i16, i16* %p3, align 1
-  %i4 = load i16, i16* %p4, align 1
-  %i5 = load i16, i16* %p5, align 1
-  %i6 = load i16, i16* %p6, align 1
-  %i7 = load i16, i16* %p7, align 1
+  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i16, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i16, ptr %p0, i64 3
+  %p4 = getelementptr inbounds i16, ptr %p0, i64 4
+  %p5 = getelementptr inbounds i16, ptr %p0, i64 5
+  %p6 = getelementptr inbounds i16, ptr %p0, i64 6
+  %p7 = getelementptr inbounds i16, ptr %p0, i64 7
+  %i0 = load i16, ptr %p0, align 1
+  %i1 = load i16, ptr %p1, align 1
+  %i2 = load i16, ptr %p2, align 1
+  %i3 = load i16, ptr %p3, align 1
+  %i4 = load i16, ptr %p4, align 1
+  %i5 = load i16, ptr %p5, align 1
+  %i6 = load i16, ptr %p6, align 1
+  %i7 = load i16, ptr %p7, align 1
   %x0 = zext i16 %i0 to i32
   %x1 = zext i16 %i1 to i32
   %x2 = zext i16 %i2 to i32
@@ -472,28 +443,25 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
 ; vXi32
 ;
 
-define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
+define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_2i32_to_2i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
 ; SSE2-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_2i32_to_2i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
 ; SLM-NEXT:    ret <2 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_2i32_to_2i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
   %x0 = zext i32 %i0 to i64
   %x1 = zext i32 %i1 to i64
   %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
@@ -501,32 +469,29 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
   ret <2 x i64> %v1
 }
 
-define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
+define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) {
 ; SSE2-LABEL: @loadext_4i32_to_4i64(
-; SSE2-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; SSE2-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
 ; SSE2-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; SLM-LABEL: @loadext_4i32_to_4i64(
-; SLM-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; SLM-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; SLM-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
 ; SLM-NEXT:    ret <4 x i64> [[TMP3]]
 ;
 ; AVX-LABEL: @loadext_4i32_to_4i64(
-; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
-; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1
 ; AVX-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
 ; AVX-NEXT:    ret <4 x i64> [[TMP3]]
 ;
-  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
-  %p2 = getelementptr inbounds i32, i32* %p0, i64 2
-  %p3 = getelementptr inbounds i32, i32* %p0, i64 3
-  %i0 = load i32, i32* %p0, align 1
-  %i1 = load i32, i32* %p1, align 1
-  %i2 = load i32, i32* %p2, align 1
-  %i3 = load i32, i32* %p3, align 1
+  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
+  %p2 = getelementptr inbounds i32, ptr %p0, i64 2
+  %p3 = getelementptr inbounds i32, ptr %p0, i64 3
+  %i0 = load i32, ptr %p0, align 1
+  %i1 = load i32, ptr %p1, align 1
+  %i2 = load i32, ptr %p2, align 1
+  %i3 = load i32, ptr %p3, align 1
   %x0 = zext i32 %i0 to i64
   %x1 = zext i32 %i1 to i64
   %x2 = zext i32 %i2 to i64

diff  --git a/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
index 86efe5ebd3487..8364934779349 100644
--- a/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
+++ b/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -5,34 +5,34 @@ target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:3
 target triple = "xcore"
 
 ; Simple 3-pair chain with loads and stores
-define void @test1(double* %a, double* %b, double* %c) {
+define void @test1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I0:%.*]] = load double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[I1:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[I0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load double, ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
-; CHECK-NEXT:    store double [[MUL]], double* [[C:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C]], i64 1
-; CHECK-NEXT:    store double [[MUL5]], double* [[ARRAYIDX5]], align 8
+; CHECK-NEXT:    store double [[MUL]], ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[C]], i64 1
+; CHECK-NEXT:    store double [[MUL5]], ptr [[ARRAYIDX5]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %i0 = load double, double* %a, align 8
-  %i1 = load double, double* %b, align 8
+  %i0 = load double, ptr %a, align 8
+  %i1 = load double, ptr %b, align 8
   %mul = fmul double %i0, %i1
-  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double, double* %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double, double* %arrayidx4, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %a, i64 1
+  %i3 = load double, ptr %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %b, i64 1
+  %i4 = load double, ptr %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
-  store double %mul, double* %c, align 8
-  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
-  store double %mul5, double* %arrayidx5, align 8
+  store double %mul, ptr %c, align 8
+  %arrayidx5 = getelementptr inbounds double, ptr %c, i64 1
+  store double %mul5, ptr %arrayidx5, align 8
   ret void
 }
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll b/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll
index 206034b723cea..e075e9291639b 100644
--- a/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll
+++ b/llvm/test/Transforms/SLPVectorizer/int_sideeffect.ll
@@ -5,60 +5,52 @@ declare void @llvm.sideeffect()
 
 ; SLP vectorization across a @llvm.sideeffect.
 
-define void @test_sideeffect(float* %p) {
+define void @test_sideeffect(ptr %p) {
 ; CHECK-LABEL: @test_sideeffect(
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0
 ; CHECK-NEXT:    call void @llvm.sideeffect()
 ; CHECK-NEXT:    call void @llvm.sideeffect()
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %p0 = getelementptr float, float* %p, i64 0
-  %p1 = getelementptr float, float* %p, i64 1
-  %p2 = getelementptr float, float* %p, i64 2
-  %p3 = getelementptr float, float* %p, i64 3
-  %l0 = load float, float* %p0
-  %l1 = load float, float* %p1
-  %l2 = load float, float* %p2
+  %p1 = getelementptr float, ptr %p, i64 1
+  %p2 = getelementptr float, ptr %p, i64 2
+  %p3 = getelementptr float, ptr %p, i64 3
+  %l0 = load float, ptr %p
+  %l1 = load float, ptr %p1
+  %l2 = load float, ptr %p2
   call void @llvm.sideeffect()
-  %l3 = load float, float* %p3
-  store float %l0, float* %p0
+  %l3 = load float, ptr %p3
+  store float %l0, ptr %p
   call void @llvm.sideeffect()
-  store float %l1, float* %p1
-  store float %l2, float* %p2
-  store float %l3, float* %p3
+  store float %l1, ptr %p1
+  store float %l2, ptr %p2
+  store float %l3, ptr %p3
   ret void
 }
 
 declare void @foo()
 
-define void @test_inaccessiblememonly(float* %p) {
+define void @test_inaccessiblememonly(ptr %p) {
 ; CHECK-LABEL: @test_inaccessiblememonly(
-; CHECK-NEXT:    [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0
 ; CHECK-NEXT:    call void @foo() #[[ATTR1:[0-9]+]]
 ; CHECK-NEXT:    call void @foo() #[[ATTR1]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %p0 = getelementptr float, float* %p, i64 0
-  %p1 = getelementptr float, float* %p, i64 1
-  %p2 = getelementptr float, float* %p, i64 2
-  %p3 = getelementptr float, float* %p, i64 3
-  %l0 = load float, float* %p0
-  %l1 = load float, float* %p1
-  %l2 = load float, float* %p2
+  %p1 = getelementptr float, ptr %p, i64 1
+  %p2 = getelementptr float, ptr %p, i64 2
+  %p3 = getelementptr float, ptr %p, i64 3
+  %l0 = load float, ptr %p
+  %l1 = load float, ptr %p1
+  %l2 = load float, ptr %p2
   call void @foo() inaccessiblememonly
-  %l3 = load float, float* %p3
-  store float %l0, float* %p0
+  %l3 = load float, ptr %p3
+  store float %l0, ptr %p
   call void @foo() inaccessiblememonly
-  store float %l1, float* %p1
-  store float %l2, float* %p2
-  store float %l3, float* %p3
+  store float %l1, ptr %p1
+  store float %l2, ptr %p2
+  store float %l3, ptr %p3
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
index 9b0508c5fb3ad..825329b5af97e 100644
--- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
@@ -11,9 +11,9 @@ define void @test() {
 ; CHECK-NEXT:    [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
-; CHECK-NEXT:    store double [[I4]], double* null, align 8
-; CHECK-NEXT:    [[I5:%.*]] = load double, double* null, align 8
-; CHECK-NEXT:    [[I6:%.*]] = load double, double* null, align 8
+; CHECK-NEXT:    store double [[I4]], ptr null, align 8
+; CHECK-NEXT:    [[I5:%.*]] = load double, ptr null, align 8
+; CHECK-NEXT:    [[I6:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
 ; CHECK-NEXT:    [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
@@ -37,9 +37,9 @@ for.body602:
   %i2 = call double @llvm.fmuladd.f64(double %i1, double 0.000000e+00, double 0.000000e+00)
   %i3 = call double @llvm.fmuladd.f64(double 0.000000e+00, double %i2, double 0.000000e+00)
   %i4 = call double @llvm.fmuladd.f64(double %mul701, double 0.000000e+00, double %mul703)
-  store double %i4, double* null, align 8
-  %i5 = load double, double* null, align 8
-  %i6 = load double, double* null, align 8
+  store double %i4, ptr null, align 8
+  %i5 = load double, ptr null, align 8
+  %i6 = load double, ptr null, align 8
   %mul746 = fmul double 0.000000e+00, %i6
   %mul747 = fmul double 0.000000e+00, %i5
   %i7 = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double %mul746)

diff  --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
index 7b7c65aa7a41b..403546f67c8a1 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
@@ -125,7 +125,7 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX32-NEXT:    [[PHI30:%.*]] = phi float [ [[I63]], [[BB3]] ], [ [[FVAL]], [[BB4]] ], [ [[FVAL]], [[BB5]] ], [ [[FVAL]], [[BB1]] ]
 ; MAX32-NEXT:    [[PHI31:%.*]] = phi float [ [[I65]], [[BB3]] ], [ [[FVAL]], [[BB4]] ], [ [[I65]], [[BB5]] ], [ [[I65]], [[BB1]] ]
 ; MAX32-NEXT:    [[PHI32:%.*]] = phi float [ [[I67]], [[BB3]] ], [ [[I67]], [[BB4]] ], [ [[FVAL]], [[BB5]] ], [ [[I67]], [[BB1]] ]
-; MAX32-NEXT:    store float [[PHI31]], float* undef, align 4
+; MAX32-NEXT:    store float [[PHI31]], ptr undef, align 4
 ; MAX32-NEXT:    ret void
 ;
 ; MAX256-LABEL: @phi_float32(
@@ -171,7 +171,7 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX256-NEXT:    [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
 ; MAX256-NEXT:    [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
 ; MAX256-NEXT:    [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
-; MAX256-NEXT:    store float [[TMP17]], float* undef, align 4
+; MAX256-NEXT:    store float [[TMP17]], ptr undef, align 4
 ; MAX256-NEXT:    ret void
 ;
 ; MAX1024-LABEL: @phi_float32(
@@ -217,7 +217,7 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX1024-NEXT:    [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
 ; MAX1024-NEXT:    [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
 ; MAX1024-NEXT:    [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
-; MAX1024-NEXT:    store float [[TMP17]], float* undef, align 4
+; MAX1024-NEXT:    store float [[TMP17]], ptr undef, align 4
 ; MAX1024-NEXT:    ret void
 ;
 bb:
@@ -340,6 +340,6 @@ bb2:
   %phi30 = phi float [ %i63, %bb3 ], [ %fval, %bb4 ], [ %fval, %bb5 ], [ %fval, %bb1 ]
   %phi31 = phi float [ %i65, %bb3 ], [ %fval, %bb4 ], [ %i65, %bb5 ], [ %i65, %bb1 ]
   %phi32 = phi float [ %i67, %bb3 ], [ %i67, %bb4 ], [ %fval, %bb5 ], [ %i67, %bb1 ]
-  store float %phi31, float* undef
+  store float %phi31, ptr undef
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
index 19e4584062234..354be6ec84262 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-max-reg-size.ll
@@ -15,61 +15,61 @@
 
 define void @foo() {
 ; CHECK-VF8-160-LABEL: @foo(
-; CHECK-VF8-160-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
-; CHECK-VF8-160-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
+; CHECK-VF8-160-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr @X, align 1
+; CHECK-VF8-160-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
 ; CHECK-VF8-160-NEXT:    ret void
 ;
 ; CHECK-VF4-160-LABEL: @foo(
-; CHECK-VF4-160-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
-; CHECK-VF4-160-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
+; CHECK-VF4-160-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr @X, align 1
+; CHECK-VF4-160-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
 ; CHECK-VF4-160-NEXT:    ret void
 ;
 ; CHECK-VF2-160-LABEL: @foo(
-; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 1, i32 2>, <2 x i32>* bitcast ([8 x i32]* @X to <2 x i32>*), align 1
-; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 3, i32 4>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2) to <2 x i32>*), align 1
-; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 5, i32 6>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <2 x i32>*), align 1
-; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 7, i32 8>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6) to <2 x i32>*), align 1
+; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr @X, align 1
+; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 3, i32 4>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 2), align 1
+; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 5, i32 6>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
+; CHECK-VF2-160-NEXT:    store <2 x i32> <i32 7, i32 8>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 6), align 1
 ; CHECK-VF2-160-NEXT:    ret void
 ;
 ; CHECK-VF8-128-LABEL: @foo(
-; CHECK-VF8-128-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
-; CHECK-VF8-128-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
+; CHECK-VF8-128-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr @X, align 1
+; CHECK-VF8-128-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
 ; CHECK-VF8-128-NEXT:    ret void
 ;
 ; CHECK-VF4-128-LABEL: @foo(
-; CHECK-VF4-128-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* bitcast ([8 x i32]* @X to <4 x i32>*), align 1
-; CHECK-VF4-128-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <4 x i32>*), align 1
+; CHECK-VF4-128-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr @X, align 1
+; CHECK-VF4-128-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
 ; CHECK-VF4-128-NEXT:    ret void
 ;
 ; CHECK-VF2-128-LABEL: @foo(
-; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 1, i32 2>, <2 x i32>* bitcast ([8 x i32]* @X to <2 x i32>*), align 1
-; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 3, i32 4>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2) to <2 x i32>*), align 1
-; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 5, i32 6>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4) to <2 x i32>*), align 1
-; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 7, i32 8>, <2 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6) to <2 x i32>*), align 1
+; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr @X, align 1
+; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 3, i32 4>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 2), align 1
+; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 5, i32 6>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
+; CHECK-VF2-128-NEXT:    store <2 x i32> <i32 7, i32 8>, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 6), align 1
 ; CHECK-VF2-128-NEXT:    ret void
 ;
 ; CHECK-VF8-256-LABEL: @foo(
-; CHECK-VF8-256-NEXT:    store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32>* bitcast ([8 x i32]* @X to <8 x i32>*), align 1
+; CHECK-VF8-256-NEXT:    store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, ptr @X, align 1
 ; CHECK-VF8-256-NEXT:    ret void
 ;
 ; CHECK-VF2-128-128-LABEL: @foo(
-; CHECK-VF2-128-128-NEXT:    store i32 1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 0), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 1), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 3), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 5), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6), align 1
-; CHECK-VF2-128-128-NEXT:    store i32 8, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 7), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 1, ptr @X, align 1
+; CHECK-VF2-128-128-NEXT:    store i32 2, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 1), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 3, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 2), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 4, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 3), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 5, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 6, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 5), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 7, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 6), align 1
+; CHECK-VF2-128-128-NEXT:    store i32 8, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 7), align 1
 ; CHECK-VF2-128-128-NEXT:    ret void
 ;
-  store i32 1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 0), align 1
-  store i32 2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 1), align 1
-  store i32 3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 2), align 1
-  store i32 4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 3), align 1
-  store i32 5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 4), align 1
-  store i32 6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 5), align 1
-  store i32 7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 6), align 1
-  store i32 8, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @X, i16 0, i16 7), align 1
+  store i32 1, ptr @X, align 1
+  store i32 2, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 1), align 1
+  store i32 3, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 2), align 1
+  store i32 4, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 3), align 1
+  store i32 5, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 4), align 1
+  store i32 6, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 5), align 1
+  store i32 7, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 6), align 1
+  store i32 8, ptr getelementptr inbounds ([8 x i32], ptr @X, i16 0, i16 7), align 1
   ret void
 }

diff  --git a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions-inseltpoison.ll
index a17b49ad1b640..1ef78bb1c81ba 100644
--- a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions-inseltpoison.ll
@@ -4,15 +4,15 @@
 declare float @memread(float) readonly nounwind willreturn #0
 declare <4 x float> @vmemread(<4 x float>)
 
-define <4 x float> @memread_4x(<4 x float>* %a) {
+define <4 x float> @memread_4x(ptr %a) {
 ; CHECK-LABEL: @memread_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vmemread(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @memread(float %vecext) #0
   %vecins = insertelement <4 x float> poison, float %1, i32 0
@@ -31,10 +31,10 @@ entry:
 declare float @memwrite(float) nounwind willreturn #1
 declare <4 x float> @vmemwrite(<4 x float>)
 
-define <4 x float> @memwrite_4x(<4 x float>* %a) {
+define <4 x float> @memwrite_4x(ptr %a) {
 ; CHECK-LABEL: @memwrite_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @memwrite(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
@@ -50,7 +50,7 @@ define <4 x float> @memwrite_4x(<4 x float>* %a) {
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @memwrite(float %vecext) #1
   %vecins = insertelement <4 x float> poison, float %1, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
index 678c1eb2d7448..04a297fdfb0c5 100644
--- a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
@@ -4,15 +4,15 @@
 declare float @memread(float) readonly nounwind willreturn #0
 declare <4 x float> @vmemread(<4 x float>)
 
-define <4 x float> @memread_4x(<4 x float>* %a) {
+define <4 x float> @memread_4x(ptr %a) {
 ; CHECK-LABEL: @memread_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vmemread(<4 x float> [[TMP0]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @memread(float %vecext) #0
   %vecins = insertelement <4 x float> undef, float %1, i32 0
@@ -31,10 +31,10 @@ entry:
 declare float @memwrite(float) nounwind willreturn #1
 declare <4 x float> @vmemwrite(<4 x float>)
 
-define <4 x float> @memwrite_4x(<4 x float>* %a) {
+define <4 x float> @memwrite_4x(ptr %a) {
 ; CHECK-LABEL: @memwrite_4x(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @memwrite(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
@@ -50,7 +50,7 @@ define <4 x float> @memwrite_4x(<4 x float>* %a) {
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
-  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %0 = load <4 x float>, ptr %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
   %1 = tail call fast float @memwrite(float %vecext) #1
   %vecins = insertelement <4 x float> undef, float %1, i32 0


        


More information about the llvm-commits mailing list